summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiang, Haihao <haihao.xiang@intel.com>2014-10-08 08:48:53 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2014-10-08 09:01:38 +0800
commit2b002e286c31c9b0c09c1f22237bb222ac77b97c (patch)
treed8ef471847df59fed762960e87cf6feedc8eb736
parent4568e24f0382d2d03215bcaf78eb0eae68f88ae5 (diff)
parentf11176415ec26eb5960ba6841d2d9c22f2cabc60 (diff)
Merge remote-tracking branch 'fdo/master' into g45-h264g45-h264
Some users still use this branch Conflicts: src/i965_avc_bsd.c src/i965_avc_ildb.c src/i965_drv_video.c
-rw-r--r--NEWS64
-rw-r--r--configure.ac15
-rwxr-xr-xsrc/Makefile.am48
-rw-r--r--src/gen6_mfc.c148
-rw-r--r--src/gen6_mfc.h70
-rw-r--r--src/gen6_mfc_common.c1011
-rwxr-xr-xsrc/gen6_mfd.c156
-rw-r--r--src/gen6_mfd.h1
-rw-r--r--src/gen6_vme.c110
-rw-r--r--src/gen6_vme.h72
-rw-r--r--src/gen75_mfc.c861
-rw-r--r--src/gen75_mfd.c160
-rw-r--r--src/gen75_picture_process.c79
-rw-r--r--src/gen75_vme.c189
-rw-r--r--src/gen75_vpp_gpe.c499
-rw-r--r--src/gen75_vpp_gpe.h47
-rw-r--r--src/gen75_vpp_vebox.c558
-rw-r--r--src/gen75_vpp_vebox.h3
-rw-r--r--src/gen7_mfc.c245
-rwxr-xr-xsrc/gen7_mfd.c145
-rw-r--r--src/gen7_mfd.h2
-rw-r--r--src/gen7_vme.c465
-rw-r--r--src/gen8_mfc.c2478
-rw-r--r--src/gen8_mfd.c3190
-rw-r--r--src/gen8_post_processing.c1466
-rw-r--r--src/gen8_render.c1824
-rw-r--r--src/gen8_vme.c1214
-rw-r--r--src/i965_avc_bsd.c63
-rw-r--r--src/i965_avc_hw_scoreboard.c4
-rw-r--r--src/i965_avc_ildb.c12
-rw-r--r--src/i965_decoder.h18
-rw-r--r--src/i965_decoder_utils.c754
-rw-r--r--src/i965_decoder_utils.h71
-rwxr-xr-xsrc/i965_defines.h75
-rw-r--r--src/i965_device_info.c532
-rw-r--r--[-rwxr-xr-x]src/i965_drv_video.c2569
-rw-r--r--src/i965_drv_video.h133
-rw-r--r--src/i965_encoder.c89
-rw-r--r--src/i965_encoder.h6
-rw-r--r--src/i965_encoder_utils.c20
-rw-r--r--src/i965_fourcc.h68
-rw-r--r--src/i965_gpe_utils.c536
-rw-r--r--src/i965_gpe_utils.h58
-rw-r--r--src/i965_media.c10
-rw-r--r--src/i965_media_h264.c36
-rw-r--r--src/i965_media_h264.h1
-rw-r--r--src/i965_media_mpeg2.c6
-rw-r--r--src/i965_output_dri.c23
-rw-r--r--src/i965_output_wayland.c14
-rw-r--r--src/i965_pciids.h135
-rwxr-xr-xsrc/i965_post_processing.c1594
-rwxr-xr-xsrc/i965_post_processing.h73
-rw-r--r--src/i965_render.c314
-rw-r--r--src/i965_render.h54
-rw-r--r--src/i965_structs.h516
-rw-r--r--src/intel_batchbuffer.c73
-rw-r--r--src/intel_batchbuffer.h3
-rw-r--r--src/intel_driver.c20
-rw-r--r--src/intel_driver.h246
-rw-r--r--src/intel_media.h1
-rw-r--r--src/intel_version.h.in36
-rw-r--r--src/shaders/post_processing/Makefile.am2
-rw-r--r--src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm43
-rwxr-xr-xsrc/shaders/post_processing/gen5_6/Makefile.am5
-rw-r--r--src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen537
-rw-r--r--src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b37
-rw-r--r--src/shaders/post_processing/gen5_6/pa_load_save_pa.asm17
-rw-r--r--src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5115
-rw-r--r--src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b188
-rw-r--r--src/shaders/post_processing/gen7/Makefile.am7
-rw-r--r--src/shaders/post_processing/gen7/pa_to_pa.asm17
-rw-r--r--src/shaders/post_processing/gen7/pa_to_pa.g75b677
-rw-r--r--src/shaders/post_processing/gen7/pa_to_pa.g7b677
-rw-r--r--src/shaders/post_processing/gen75/Makefile.am9
-rw-r--r--src/shaders/post_processing/gen8/EOT.g8a166
-rw-r--r--src/shaders/post_processing/gen8/Makefile.am79
-rw-r--r--src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a457
-rw-r--r--src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a457
-rw-r--r--src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a457
-rw-r--r--src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a457
-rw-r--r--src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a462
-rw-r--r--src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a458
-rw-r--r--src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a458
-rw-r--r--src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a460
-rw-r--r--src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a470
-rw-r--r--src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a470
-rw-r--r--src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a470
-rw-r--r--src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a470
-rw-r--r--src/shaders/post_processing/gen8/RGB_to_YUV.g8a910
-rw-r--r--src/shaders/post_processing/gen8/Save_AVS_NV12.g8a621
-rw-r--r--src/shaders/post_processing/gen8/Save_AVS_PA.g8a629
-rw-r--r--src/shaders/post_processing/gen8/Save_AVS_PL3.g8a565
-rw-r--r--src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a641
-rw-r--r--src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a368
-rw-r--r--src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a361
-rw-r--r--src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a362
-rw-r--r--src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a366
-rw-r--r--src/shaders/post_processing/gen8/Set_Layer_0.g8a483
-rw-r--r--src/shaders/post_processing/gen8/VP_Setup.g8a440
-rw-r--r--src/shaders/post_processing/gen8/YUV_to_RGB.g8a971
-rw-r--r--src/shaders/post_processing/gen8/pa_to_pa.asm17
-rw-r--r--src/shaders/post_processing/gen8/pa_to_pa.g8b279
-rw-r--r--src/shaders/post_processing/gen8/pa_to_pl2.asm17
-rw-r--r--src/shaders/post_processing/gen8/pa_to_pl2.g8b236
-rw-r--r--src/shaders/post_processing/gen8/pa_to_pl3.asm17
-rw-r--r--src/shaders/post_processing/gen8/pa_to_pl3.g8b189
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_pa.asm17
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_pa.g8b287
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_pl2.asm17
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_pl2.g8b244
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_pl3.asm17
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_pl3.g8b197
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_rgbx.asm18
-rw-r--r--src/shaders/post_processing/gen8/pl2_to_rgbx.g8b738
-rw-r--r--src/shaders/post_processing/gen8/pl3_to_pa.asm17
-rw-r--r--src/shaders/post_processing/gen8/pl3_to_pa.g8b303
-rw-r--r--src/shaders/post_processing/gen8/pl3_to_pl2.asm17
-rw-r--r--src/shaders/post_processing/gen8/pl3_to_pl2.g8b260
-rw-r--r--src/shaders/post_processing/gen8/pl3_to_pl3.asm17
-rw-r--r--src/shaders/post_processing/gen8/pl3_to_pl3.g8b213
-rw-r--r--src/shaders/post_processing/gen8/rgbx_to_nv12.asm18
-rw-r--r--src/shaders/post_processing/gen8/rgbx_to_nv12.g8b661
-rw-r--r--src/shaders/post_processing/gen8/sharpening_h_blur.g8b1718
-rw-r--r--src/shaders/post_processing/gen8/sharpening_unmask.g8b159
-rw-r--r--src/shaders/post_processing/gen8/sharpening_v_blur.g8b296
-rw-r--r--src/shaders/render/Makefile.am50
-rw-r--r--src/shaders/render/exa_wm.g4i71
-rw-r--r--src/shaders/render/exa_wm_src_affine.g4b12
-rw-r--r--src/shaders/render/exa_wm_src_affine.g4b.gen512
-rw-r--r--src/shaders/render/exa_wm_src_affine.g6a3
-rw-r--r--src/shaders/render/exa_wm_src_affine.g6b8
-rw-r--r--src/shaders/render/exa_wm_src_affine.g7a2
-rw-r--r--src/shaders/render/exa_wm_src_affine.g7b8
-rw-r--r--src/shaders/render/exa_wm_src_affine.g8a45
-rw-r--r--src/shaders/render/exa_wm_src_affine.g8b4
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g4a4
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g4b4
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g4b.gen54
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g6a2
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g6b2
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g7a4
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g7b4
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g8a59
-rw-r--r--src/shaders/render/exa_wm_src_sample_argb.g8b5
-rw-r--r--src/shaders/render/exa_wm_src_sample_planar.g8a106
-rw-r--r--src/shaders/render/exa_wm_src_sample_planar.g8b20
-rw-r--r--src/shaders/render/exa_wm_write.g7a16
-rw-r--r--src/shaders/render/exa_wm_write.g7b16
-rw-r--r--src/shaders/render/exa_wm_write.g8a83
-rw-r--r--src/shaders/render/exa_wm_write.g8b19
-rw-r--r--src/shaders/render/exa_wm_xy.g4b4
-rw-r--r--src/shaders/render/exa_wm_xy.g4b.gen54
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g4a38
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g4b15
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g4b.gen515
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g6a38
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g6b15
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g7a38
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g7b15
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell15
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g8a39
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.g8b15
-rw-r--r--src/shaders/render/exa_wm_yuv_color_balance.gxa75
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g4a72
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g4b23
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g4b.gen523
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g6a73
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g6b23
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g7a73
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g7b23
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g8a32
-rw-r--r--src/shaders/render/exa_wm_yuv_rgb.g8b13
-rw-r--r--src/shaders/render/exa_yuv_gen4.g4i42
-rw-r--r--src/shaders/render/exa_yuv_gen6.g4i42
-rw-r--r--src/shaders/render/exa_yuv_rgb.gxa74
-rw-r--r--src/shaders/utils/Makefile.am25
-rw-r--r--src/shaders/utils/mfc_batchbuffer.inc2
-rw-r--r--src/shaders/utils/mfc_batchbuffer_avc_inter.asm6
-rw-r--r--src/shaders/utils/mfc_batchbuffer_avc_inter.g6b6
-rw-r--r--src/shaders/utils/mfc_batchbuffer_avc_inter.g7b6
-rw-r--r--src/shaders/utils/mfc_batchbuffer_hsw.asm296
-rw-r--r--src/shaders/utils/mfc_batchbuffer_hsw.g75a29
-rw-r--r--src/shaders/utils/mfc_batchbuffer_hsw.g75b105
-rw-r--r--src/shaders/utils/mfc_batchbuffer_hsw.inc195
-rw-r--r--src/shaders/vme/Makefile.am44
-rw-r--r--src/shaders/vme/inter_bframe_gen8.asm875
-rw-r--r--src/shaders/vme/inter_bframe_gen8.g8a2
-rw-r--r--src/shaders/vme/inter_bframe_gen8.g8b423
-rw-r--r--src/shaders/vme/inter_bframe_haswell.asm2
-rw-r--r--src/shaders/vme/inter_bframe_haswell.g75b2
-rw-r--r--src/shaders/vme/inter_bframe_ivb.asm2
-rw-r--r--src/shaders/vme/inter_bframe_ivb.g7b2
-rw-r--r--src/shaders/vme/inter_frame.asm25
-rw-r--r--src/shaders/vme/inter_frame.g6b9
-rw-r--r--src/shaders/vme/inter_frame.g7b8
-rw-r--r--src/shaders/vme/inter_frame_gen8.asm760
-rw-r--r--src/shaders/vme/inter_frame_gen8.g8a2
-rw-r--r--src/shaders/vme/inter_frame_gen8.g8b327
-rw-r--r--src/shaders/vme/inter_frame_haswell.asm71
-rw-r--r--src/shaders/vme/inter_frame_haswell.g75b54
-rw-r--r--src/shaders/vme/inter_frame_ivb.asm24
-rw-r--r--src/shaders/vme/inter_frame_ivb.g7b18
-rw-r--r--src/shaders/vme/intra_frame_gen8.asm185
-rw-r--r--src/shaders/vme/intra_frame_gen8.g8a2
-rw-r--r--src/shaders/vme/intra_frame_gen8.g8b72
-rw-r--r--src/shaders/vme/mpeg2_inter_frame.g7a3
-rw-r--r--src/shaders/vme/mpeg2_inter_frame.g7b105
-rw-r--r--src/shaders/vme/mpeg2_inter_gen8.asm868
-rw-r--r--src/shaders/vme/mpeg2_inter_gen8.g8a3
-rw-r--r--src/shaders/vme/mpeg2_inter_gen8.g8b371
-rw-r--r--src/shaders/vme/mpeg2_inter_haswell.asm860
-rw-r--r--src/shaders/vme/mpeg2_inter_haswell.g75a (renamed from src/shaders/vme/mpeg2_inter_frame_haswell.g75a)2
-rw-r--r--src/shaders/vme/mpeg2_inter_haswell.g75b (renamed from src/shaders/vme/mpeg2_inter_frame_haswell.g75b)233
-rw-r--r--src/shaders/vme/mpeg2_inter_ivb.asm705
-rw-r--r--src/shaders/vme/mpeg2_inter_ivb.g7a3
-rw-r--r--src/shaders/vme/mpeg2_inter_ivb.g7b308
-rw-r--r--src/shaders/vme/vme.inc5
-rw-r--r--src/shaders/vme/vme7.inc8
-rw-r--r--src/shaders/vme/vme75.inc8
-rw-r--r--src/shaders/vme/vme75_mpeg2.inc15
-rw-r--r--src/shaders/vme/vme7_mpeg2.inc17
-rw-r--r--src/shaders/vme/vme8.inc347
-rw-r--r--src/va_backend_compat.h5
223 files changed, 47715 insertions, 4723 deletions
diff --git a/NEWS b/NEWS
index 4ac3a3f..01b5464 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,65 @@
-libva-intel-driver NEWS -- summary of changes. 2013-06-26
-Copyright (C) 2009-2013 Intel Corporation
+libva-intel-driver NEWS -- summary of changes. 2014-09-30
+Copyright (C) 2009-2014 Intel Corporation
+
+Version 1.4.0 - 30.Sep.2014
+* Add support for exporting VA buffer
+* Add support for MVC decoding/encoding
+* Add support for encoding quality level on Sandybride and newer
+* Add support of inserting packed slice header & raw data for encoding
+* Add support for Cherryview
+* Fix the GPU hang issue on Ivybridge when using the gstreamer and mplayer to play back one H264 clip
+ (https://bugs.freedesktop.org/show_bug.cgi?id=80720)
+* Fix the GPU hang issue on Sandybride and newer when playing back one H264 clip
+ (https://bugs.freedesktop.org/show_bug.cgi?id=76363)
+* Fix the GPU hang issue on Haswell when using XBMC to play back one H264 clip
+ (https://bugs.freedesktop.org/show_bug.cgi?id=81447)
+
+Version 1.3.2 - 16.Jun.2014
+* Export JPEG format by vaDeriveImage()
+* Add support for MADI on SNB
+* H.264: fix the support for grayscale format (Y800)
+* Fix vaGetConfigAttributes() to validate the profile/entrypoint pair
+* Fix vaCreateConfig() to not override user chroma format
+* Fix the scaling issue on IVB/HSW/BDW
+* Fix https://bugs.freedesktop.org/show_bug.cgi?id=73424
+* Fix https://bugs.freedesktop.org/show_bug.cgi?id=72522
+* Fix https://bugs.freedesktop.org/show_bug.cgi?id=77041
+* Quality improvement for H.264 encoding on BDW
+
+Version 1.3.1 - 09.May.2014
+* Add support for STE on Broadwell
+* Add support for YV16
+* Add support for user specified tiling and stride
+* Fix VP8 decoding on Broadwell
+* Fix the wrong alpha when convert NV12 into RGBA
+* Fix https://bugs.freedesktop.org/show_bug.cgi?id=77386
+
+Version 1.3.0 - 24.Mar.2014
+* Add support for Broadwell
+ - Decoding: H.264/MPEG-2/VC-1/JPEG/VP8
+ - Encoding: H.264/MPEG-2
+ - VPP: CSC/scaling/NoiseReduction/Deinterlacing{Bob, MotionAdaptive, MotionCompensated}/Sharpening/ColorBalance
+* Fix the wrong setting in MI_BATCH_BATCH_START
+
+Version 1.2.2 - 16.Dec.2013
+* Motion compensation DI on HSW
+* Optimization of FPS for H.264 encoding on HSW
+* Add brightness/contrast/hue/saturation support for rendering.
+* Support BT601/BT709/SMPTE240 in vaPutSurface()
+* Expose Constrained Baseline Profile instead of Baseline Profile for H.264
+* Bug fixes
+
+Version 1.2.1 - 23.Sep.2013
+* Add PCI IDs for Bay Trail
+* Performance improvement for MPEG-2 Encoding on IVB/HSW
+* Add basic processing support for packed YUV to packed YUV on ILK+
+* Check the underlying OS support for VEBOX on HSW
+* Quality improvement for BobDI on SNB/IVB
+* Add support for Motion Adaptive Deinterlacing on IVB
+* vaDeriveImage() works for UYVY formats
+* Fix thread safety issue
+* Fix GPU hang issue when decoding some videos on SNB
+* Fix output filter count from QueryVideoProcFilters()
Version 1.2.0 - 26.Jun.2013
* The new H.264 encoding API on SNB/IVB/HSW
diff --git a/configure.ac b/configure.ac
index e39f1d5..d2bbe47 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,6 +1,6 @@
# intel-driver package version number
m4_define([intel_driver_major_version], [1])
-m4_define([intel_driver_minor_version], [2])
+m4_define([intel_driver_minor_version], [4])
m4_define([intel_driver_micro_version], [1])
m4_define([intel_driver_pre_version], [1])
m4_define([intel_driver_version],
@@ -10,8 +10,8 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre])
])
# libva minimum version requirement
-m4_define([va_api_version], [0.34])
-m4_define([libva_package_version], [1.2.0])
+m4_define([va_api_version], [0.36])
+m4_define([libva_package_version], [1.4.0])
# libdrm minimum version requirement
m4_define([libdrm_version], [2.4.45])
@@ -76,10 +76,15 @@ PKG_CHECK_MODULES([DRM], [libdrm >= $LIBDRM_VERSION])
AC_SUBST(LIBDRM_VERSION)
dnl Check for gen4asm
-PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.3], [gen4asm=yes], [gen4asm=no])
+PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.5], [gen4asm=yes], [gen4asm=no])
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
AC_PATH_PROG([GEN4ASM], [intel-gen4asm])
+dnl Check for git
+AC_ARG_VAR([GIT], [Path to git program, if any])
+AC_PATH_PROG([GIT], [git])
+AM_CONDITIONAL([HAVE_GIT], [test -n "$GIT"])
+
dnl Check for VA-API
PKG_CHECK_MODULES(LIBVA_DEPS, [libva >= va_api_version])
@@ -178,6 +183,8 @@ AC_OUTPUT([
src/shaders/post_processing/Makefile
src/shaders/post_processing/gen5_6/Makefile
src/shaders/post_processing/gen7/Makefile
+ src/shaders/post_processing/gen75/Makefile
+ src/shaders/post_processing/gen8/Makefile
src/shaders/render/Makefile
src/shaders/utils/Makefile
src/shaders/vme/Makefile
diff --git a/src/Makefile.am b/src/Makefile.am
index 3299733..acfa849 100755
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -20,7 +20,10 @@
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-SUBDIRS = shaders
+SUBDIRS = shaders
+DIST_SUBDIRS = $(SUBDIRS)
+EXTRA_DIST =
+BUILT_SOURCES =
AM_CPPFLAGS = \
-DPTHREADS \
@@ -56,6 +59,9 @@ source_c = \
gen7_mfd.c \
gen75_mfd.c \
gen75_mfc.c \
+ gen8_mfc.c \
+ gen8_mfd.c \
+ gen8_vme.c \
gen75_picture_process.c \
gen75_vme.c \
gen75_vpp_gpe.c \
@@ -64,6 +70,7 @@ source_c = \
i965_avc_hw_scoreboard.c\
i965_avc_ildb.c \
i965_decoder_utils.c \
+ i965_device_info.c \
i965_drv_video.c \
i965_encoder.c \
i965_encoder_utils.c \
@@ -72,7 +79,9 @@ source_c = \
i965_media_mpeg2.c \
i965_gpe_utils.c \
i965_post_processing.c \
+ gen8_post_processing.c \
i965_render.c \
+ gen8_render.c \
intel_batchbuffer.c \
intel_batchbuffer_dump.c\
intel_driver.c \
@@ -104,6 +113,7 @@ source_h = \
i965_media_mpeg2.h \
i965_mutext.h \
i965_gpe_utils.h \
+ i965_pciids.h \
i965_post_processing.h \
i965_render.h \
i965_structs.h \
@@ -113,9 +123,11 @@ source_h = \
intel_driver.h \
intel_media.h \
intel_memman.h \
+ intel_version.h \
object_heap.h \
sysdeps.h \
va_backend_compat.h \
+ i965_fourcc.h \
$(NULL)
i965_drv_video_la_LTLIBRARIES = i965_drv_video.la
@@ -137,11 +149,43 @@ source_h += i965_output_wayland.h
driver_cflags += $(WAYLAND_CFLAGS)
endif
+# git version
+VERSION_FILE = .VERSION
+OLD_VERSION_FILE = $(VERSION_FILE).old
+NEW_VERSION_FILE = $(VERSION_FILE).new
+PKG_VERSION_FILE = $(VERSION_FILE).pkg
+
+intel_version.h: gen-version
+ $(AM_V_GEN) \
+ OV=`[ -f $(OLD_VERSION_FILE) ] && cat $(OLD_VERSION_FILE) || :`; \
+ NV=`cat $(NEW_VERSION_FILE)`; \
+ if [ "$$OV" != "$$NV" -o ! -f intel_version.h ]; then \
+ cp -f $(NEW_VERSION_FILE) $(OLD_VERSION_FILE); \
+ $(SED) -e "s|\@INTEL_DRIVER_GIT_VERSION\@|$${NV}|" \
+ $(srcdir)/intel_version.h.in > intel_version.h; \
+ fi
+
+gen-version:
+ @echo $(VERSION) > $(NEW_VERSION_FILE)
+if HAVE_GIT
+ @[ -d $(top_srcdir)/.git ] && \
+ (cd $(top_srcdir) && $(GIT) describe --tags) > $(NEW_VERSION_FILE) || :
+endif
+ @[ -f $(srcdir)/$(PKG_VERSION_FILE) ] && \
+ cp -f $(srcdir)/$(PKG_VERSION_FILE) $(NEW_VERSION_FILE) || :
+
+$(PKG_VERSION_FILE): $(NEW_VERSION_FILE)
+ @cp -f $< $@
+
+BUILT_SOURCES += intel_version.h
+EXTRA_DIST += intel_version.h.in $(PKG_VERSION_FILE)
+
# Wayland protocol
+protocol_source_h = wayland-drm-client-protocol.h
i965_output_wayland.c: $(protocol_source_h)
@wayland_scanner_rules@
-DIST_SUBDIRS = $(SUBDIRS) wayland
+DIST_SUBDIRS += wayland
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in config.h.in
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 883a42b..455721f 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -42,6 +42,10 @@
#include "gen6_vme.h"
#include "intel_media.h"
+#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
+
static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
#include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
};
@@ -360,16 +364,29 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx,
int weighted_pred_idc = 0;
unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
- int bslice = 0;
+ int num_ref_l0 = 0, num_ref_l1 = 0;
if (batch == NULL)
batch = encoder_context->base.batch;
- if (slice_type == SLICE_TYPE_P) {
+ if (slice_type == SLICE_TYPE_I) {
+ luma_log2_weight_denom = 0;
+ chroma_log2_weight_denom = 0;
+ } else if (slice_type == SLICE_TYPE_P) {
weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
+
+ if (slice_param->num_ref_idx_active_override_flag)
+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
} else if (slice_type == SLICE_TYPE_B) {
weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
- bslice = 1;
+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
+
+ if (slice_param->num_ref_idx_active_override_flag) {
+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+ }
if (weighted_pred_idc == 2) {
/* 8.4.3 - Derivation process for prediction weights (8-279) */
@@ -394,14 +411,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx,
OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
- if (slice_type == SLICE_TYPE_I) {
- OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
- } else {
- OUT_BCS_BATCH(batch,
- (1 << 16) | (bslice << 24) | /*1 reference frame*/
- (chroma_log2_weight_denom << 8) |
- (luma_log2_weight_denom << 0));
- }
+ OUT_BCS_BATCH(batch,
+ (num_ref_l0 << 16) |
+ (num_ref_l1 << 24) |
+ (chroma_log2_weight_denom << 8) |
+ (luma_log2_weight_denom << 0));
OUT_BCS_BATCH(batch,
(weighted_pred_idc << 30) |
@@ -516,9 +530,25 @@ gen6_mfc_init(VADriverContextP ctx,
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
dri_bo *bo;
int i;
- VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
- int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
- int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ int width_in_mbs = 0;
+ int height_in_mbs = 0;
+ int slice_batchbuffer_size;
+
+ if (encoder_context->codec == CODEC_H264) {
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ } else {
+ VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+
+ assert(encoder_context->codec == CODEC_MPEG2);
+
+ width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
+ height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
+ }
+
+ slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
+ (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
/*Encode common setup for MFC*/
dri_bo_unreference(mfc_context->post_deblocking_output.bo);
@@ -586,7 +616,8 @@ gen6_mfc_init(VADriverContextP ctx,
if (mfc_context->aux_batchbuffer)
intel_batchbuffer_free(mfc_context->aux_batchbuffer);
- mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
+ mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
+ slice_batchbuffer_size);
mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
mfc_context->aux_batchbuffer_surface.pitch = 16;
@@ -597,8 +628,8 @@ gen6_mfc_init(VADriverContextP ctx,
}
static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -696,6 +727,7 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in
unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
struct intel_batchbuffer *batch)
{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
int len_in_dwords = 11;
if (batch == NULL)
@@ -725,8 +757,8 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in
/*Stuff for Inter MB*/
OUT_BCS_BATCH(batch, msg[1]);
- OUT_BCS_BATCH(batch, 0x0);
- OUT_BCS_BATCH(batch, 0x0);
+ OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
+ OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
/*MaxSizeInWord and TargetSzieInWord*/
OUT_BCS_BATCH(batch, (max_mb_size << 24) |
@@ -756,15 +788,18 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int i,x,y;
int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
unsigned int rate_control_mode = encoder_context->rate_control_mode;
- unsigned char *slice_header = NULL;
- int slice_header_length_in_bits = 0;
unsigned int tail_data[] = { 0x0, 0x0 };
int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
int is_intra = slice_type == SLICE_TYPE_I;
+ int qp_slice;
+ qp_slice = qp;
if (rate_control_mode == VA_RC_CBR) {
qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
- pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ if (encode_state->slice_header_index[slice_index] == 0) {
+ pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ qp_slice = qp;
+ }
}
/* only support for 8-bit pixel bit-depth */
@@ -777,18 +812,12 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp, slice_batch);
+ (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
- slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
-
- // slice hander
- mfc_context->insert_object(ctx, encoder_context,
- (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
- 5, /* first 5 bytes are start code + nal unit type */
- 1, 0, 1, slice_batch);
+ intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
dri_bo_map(vme_context->vme_output.bo , 1);
msg = (unsigned int *)vme_context->vme_output.bo->virtual;
@@ -835,7 +864,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1, 1, 1, 0, slice_batch);
}
- free(slice_header);
}
@@ -845,10 +873,14 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
- dri_bo *batch_bo = batch->buffer;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct intel_batchbuffer *batch;;
+ dri_bo *batch_bo;
int i;
+ batch = mfc_context->aux_batchbuffer;
+ batch_bo = batch->buffer;
+
for (i = 0; i < encode_state->num_slice_params_ext; i++) {
gen6_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
}
@@ -861,7 +893,9 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
dri_bo_reference(batch_bo);
+
intel_batchbuffer_free(batch);
+ mfc_context->aux_batchbuffer = NULL;
return batch_bo;
}
@@ -992,11 +1026,12 @@ gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
int mb_x,
int mb_y,
int width_in_mbs,
- int qp)
+ int qp,
+ unsigned int ref_index[2])
{
- BEGIN_BATCH(batch, 12);
+ BEGIN_BATCH(batch, 14);
- OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
+ OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
OUT_BATCH(batch, index);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
@@ -1020,6 +1055,8 @@ gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
OUT_BATCH(batch,
qp << 16 |
width_in_mbs);
+ OUT_BATCH(batch, ref_index[0]);
+ OUT_BATCH(batch, ref_index[1]);
ADVANCE_BATCH(batch);
}
@@ -1037,6 +1074,7 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int total_mbs = slice_param->num_macroblocks;
int number_mb_cmds = 128;
@@ -1068,7 +1106,8 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
mb_x,
mb_y,
width_in_mbs,
- qp);
+ qp,
+ vme_context->ref_index_in_mb);
if (first_object) {
head_offset += head_size;
@@ -1106,7 +1145,8 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
mb_x,
mb_y,
width_in_mbs,
- qp);
+ qp,
+ vme_context->ref_index_in_mb);
}
}
@@ -1130,17 +1170,21 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
unsigned int rate_control_mode = encoder_context->rate_control_mode;
- unsigned char *slice_header = NULL;
- int slice_header_length_in_bits = 0;
unsigned int tail_data[] = { 0x0, 0x0 };
long head_offset;
int old_used = intel_batchbuffer_used_size(slice_batch), used;
unsigned short head_size, tail_size;
int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+ int qp_slice;
+ qp_slice = qp;
if (rate_control_mode == VA_RC_CBR) {
qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
- pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ if (encode_state->slice_header_index[slice_index] == 0) {
+ pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ /* Use the adjusted qp when slice_header is generated by driver */
+ qp_slice = qp;
+ }
}
/* only support for 8-bit pixel bit-depth */
@@ -1156,26 +1200,13 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
encode_state,
encoder_context,
(rate_control_mode == VA_RC_CBR),
- qp,
+ qp_slice,
slice_batch);
if (slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
- slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
-
- // slice hander
- mfc_context->insert_object(ctx,
- encoder_context,
- (unsigned int *)slice_header,
- ALIGN(slice_header_length_in_bits, 32) >> 5,
- slice_header_length_in_bits & 0x1f,
- 5, /* first 5 bytes are start code + nal unit type */
- 1,
- 0,
- 1,
- slice_batch);
- free(slice_header);
+ intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
used = intel_batchbuffer_used_size(slice_batch);
@@ -1360,7 +1391,7 @@ gen6_mfc_pipeline(VADriverContextP ctx,
VAStatus vaStatus;
switch (profile) {
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
@@ -1436,6 +1467,9 @@ Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
{
struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
+ if (!mfc_context)
+ return False;
+
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h
index e6b04a1..67c62a4 100644
--- a/src/gen6_mfc.h
+++ b/src/gen6_mfc.h
@@ -42,19 +42,14 @@ struct encode_state;
#define INTRA_MB_FLAG_MASK 0x00002000
-#define __SOFTWARE__ 0
-
-#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+/* The space required for slice header SLICE_STATE + header.
+ * Is it enough? */
+#define SLICE_HEADER 80
-#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6)
+/* the space required for slice tail. */
+#define SLICE_TAIL 16
-#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
-#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
-#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
+#define __SOFTWARE__ 0
#define MFC_BATCHBUFFER_AVC_INTRA 0
#define MFC_BATCHBUFFER_AVC_INTER 1
@@ -164,6 +159,12 @@ struct gen6_mfc_context
int target_frame_size[3]; // I,P,B
double bits_per_frame;
double qpf_rounding_accumulator;
+
+ double saved_bps;
+ double saved_fps;
+ int saved_intra_period;
+ int saved_ip_period;
+ int saved_idr_period;
} brc;
struct {
@@ -200,10 +201,10 @@ struct gen6_mfc_context
void (*set_surface_state)(VADriverContextP ctx,
struct intel_encoder_context *encoder_context);
void (*ind_obj_base_addr_state)(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context);
+ struct intel_encoder_context *encoder_context);
void (*avc_img_state)(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context);
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
void (*avc_qm_state)(VADriverContextP ctx,
struct intel_encoder_context *encoder_context);
void (*avc_fqm_state)(VADriverContextP ctx,
@@ -234,38 +235,47 @@ Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *
extern int intel_mfc_update_hrd(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context,
- int frame_bits);
+ struct gen6_mfc_context *mfc_context,
+ int frame_bits);
extern int intel_mfc_brc_postpack(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context,
- int frame_bits);
+ struct gen6_mfc_context *mfc_context,
+ int frame_bits);
extern void intel_mfc_hrd_context_update(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context);
+ struct gen6_mfc_context *mfc_context);
extern int intel_mfc_interlace_check(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context);
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
extern void intel_mfc_brc_prepare(struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context);
+ struct intel_encoder_context *encoder_context);
extern void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- struct intel_batchbuffer *slice_batch);
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ struct intel_batchbuffer *slice_batch);
extern VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context);
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
extern int intel_avc_enc_slice_type_fixup(int type);
-
extern void
intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context);
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
+
+extern
+Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
+
+extern void
+intel_avc_slice_insert_packed_data(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index,
+ struct intel_batchbuffer *slice_batch);
#endif /* _GEN6_MFC_BCS_H_ */
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index ab91c86..77c46dd 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -43,10 +43,10 @@
#include "gen6_vme.h"
#include "intel_media.h"
-#define BRC_CLIP(x, min, max) \
-{ \
- x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
-}
+#define BRC_CLIP(x, min, max) \
+ { \
+ x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
+ }
#define BRC_P_B_QP_DIFF 4
#define BRC_I_P_QP_DIFF 2
@@ -86,11 +86,11 @@ int intel_avc_enc_slice_type_fixup(int slice_type)
static void
intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context)
+ struct gen6_mfc_context *mfc_context)
{
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
- int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
- int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
int intra_mb_size = inter_mb_size * 5.0;
@@ -130,7 +130,7 @@ intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
}
static void intel_mfc_brc_init(struct encode_state *encode_state,
- struct intel_encoder_context* encoder_context)
+ struct intel_encoder_context* encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
@@ -166,7 +166,7 @@ static void intel_mfc_brc_init(struct encode_state *encode_state,
mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
mfc_context->hrd.current_buffer_fullness =
(double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
- pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
+ pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
mfc_context->hrd.violation_noted = 0;
@@ -188,8 +188,8 @@ static void intel_mfc_brc_init(struct encode_state *encode_state,
}
int intel_mfc_update_hrd(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context,
- int frame_bits)
+ struct gen6_mfc_context *mfc_context,
+ int frame_bits)
{
double prev_bf = mfc_context->hrd.current_buffer_fullness;
@@ -213,8 +213,8 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
}
int intel_mfc_brc_postpack(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context,
- int frame_bits)
+ struct gen6_mfc_context *mfc_context,
+ int frame_bits)
{
gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
@@ -243,7 +243,7 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
if (frame_size_alpha > 30) frame_size_alpha = 30;
frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
- (double)(frame_size_alpha + 1.);
+ (double)(frame_size_alpha + 1.);
/* frame_size_next: avoiding negative number and too small value */
if ((double)frame_size_next < (double)(target_frame_size * 0.25))
@@ -333,7 +333,7 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
}
static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
@@ -357,14 +357,14 @@ static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
void
intel_mfc_hrd_context_update(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context)
+ struct gen6_mfc_context *mfc_context)
{
mfc_context->vui_hrd.i_frame_number++;
}
int intel_mfc_interlace_check(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
VAEncSliceParameterBufferH264 *pSliceParameter;
@@ -384,33 +384,136 @@ int intel_mfc_interlace_check(VADriverContextP ctx,
return 1;
}
+/*
+ * Check whether the parameters related with CBR are updated and decide whether
+ * it needs to reinitialize the configuration related with CBR.
+ * Currently it will check the following parameters:
+ * bits_per_second
+ * frame_rate
+ * gop_configuration(intra_period, ip_period, intra_idr_period)
+ */
+static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ unsigned int rate_control_mode = encoder_context->rate_control_mode;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ double cur_fps, cur_bitrate;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter;
+
+
+ if (rate_control_mode != VA_RC_CBR) {
+ return false;
+ }
+
+ pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+
+ cur_bitrate = pSequenceParameter->bits_per_second;
+ cur_fps = (double)pSequenceParameter->time_scale /
+ (2 * (double)pSequenceParameter->num_units_in_tick);
+
+ if ((cur_bitrate == mfc_context->brc.saved_bps) &&
+ (cur_fps == mfc_context->brc.saved_fps) &&
+ (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
+ (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
+ (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
+ /* the parameters related with CBR are not updaetd */
+ return false;
+ }
+
+ mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
+ mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
+ mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
+ mfc_context->brc.saved_fps = cur_fps;
+ mfc_context->brc.saved_bps = cur_bitrate;
+ return true;
+}
+
void intel_mfc_brc_prepare(struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct intel_encoder_context *encoder_context)
{
unsigned int rate_control_mode = encoder_context->rate_control_mode;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
if (rate_control_mode == VA_RC_CBR) {
+ bool brc_updated;
+ assert(encoder_context->codec != CODEC_MPEG2);
+
+ brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
+
/*Programing bit rate control */
- if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
+ if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
+ brc_updated) {
intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
intel_mfc_brc_init(encode_state, encoder_context);
}
/*Programing HRD control */
- if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
+ if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
intel_mfc_hrd_context_init(encode_state, encoder_context);
}
}
+static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length)
+{
+ int i, found;
+ int leading_zero_cnt, byte_length, zero_byte;
+ int nal_unit_type;
+ int skip_cnt = 0;
+
+#define NAL_UNIT_TYPE_MASK 0x1f
+#define HW_MAX_SKIP_LENGTH 15
+
+ byte_length = ALIGN(bits_length, 32) >> 3;
+
+
+ leading_zero_cnt = 0;
+ found = 0;
+ for(i = 0; i < byte_length - 4; i++) {
+ if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
+ ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
+ found = 1;
+ break;
+ }
+ leading_zero_cnt++;
+ }
+ if (!found) {
+ /* warning message is complained. But anyway it will be inserted. */
+ WARN_ONCE("Invalid packed header data. "
+ "Can't find the 000001 start_prefix code\n");
+ return 0;
+ }
+ i = leading_zero_cnt;
+
+ zero_byte = 0;
+ if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
+ zero_byte = 1;
+
+ skip_cnt = leading_zero_cnt + zero_byte + 3;
+
+ /* the unit header byte is accounted */
+ nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
+ skip_cnt += 1;
+
+ if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
+ /* more unit header bytes are accounted for MVC/SVC */
+ skip_cnt += 3;
+ }
+ if (skip_cnt > HW_MAX_SKIP_LENGTH) {
+ WARN_ONCE("Too many leading zeros are padded for packed data. "
+ "It is beyond the HW range.!!!\n");
+ }
+ return skip_cnt;
+}
+
void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- struct intel_batchbuffer *slice_batch)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ struct intel_batchbuffer *slice_batch)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
unsigned int rate_control_mode = encoder_context->rate_control_mode;
+ unsigned int skip_emul_byte_cnt;
if (encode_state->packed_header_data[idx]) {
VAEncPackedHeaderParameterBuffer *param = NULL;
@@ -421,12 +524,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
length_in_bits = param->bit_length;
+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
mfc_context->insert_object(ctx,
encoder_context,
header_data,
ALIGN(length_in_bits, 32) >> 5,
length_in_bits & 0x1f,
- 5, /* FIXME: check it */
+ skip_emul_byte_cnt,
0,
0,
!param->has_emulation_bytes,
@@ -444,12 +548,14 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
length_in_bits = param->bit_length;
+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
+
mfc_context->insert_object(ctx,
encoder_context,
header_data,
ALIGN(length_in_bits, 32) >> 5,
length_in_bits & 0x1f,
- 5, /* FIXME: check it */
+ skip_emul_byte_cnt,
0,
0,
!param->has_emulation_bytes,
@@ -467,12 +573,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
length_in_bits = param->bit_length;
+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
mfc_context->insert_object(ctx,
encoder_context,
header_data,
ALIGN(length_in_bits, 32) >> 5,
length_in_bits & 0x1f,
- 5, /* FIXME: check it */
+ skip_emul_byte_cnt,
0,
0,
!param->has_emulation_bytes,
@@ -484,13 +591,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
unsigned char *sei_data = NULL;
int length_in_bits = build_avc_sei_buffer_timing(
- mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
- mfc_context->vui_hrd.i_initial_cpb_removal_delay,
- 0,
- mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
- mfc_context->vui_hrd.i_dpb_output_delay_length,
- 0,
- &sei_data);
+ mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
+ mfc_context->vui_hrd.i_initial_cpb_removal_delay,
+ 0,
+ mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
+ mfc_context->vui_hrd.i_dpb_output_delay_length,
+ 0,
+ &sei_data);
mfc_context->insert_object(ctx,
encoder_context,
(unsigned int *)sei_data,
@@ -506,8 +613,8 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
}
VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -523,7 +630,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
- if (IS_GEN6(i965->intel.device_id)) {
+ if (IS_GEN6(i965->intel.device_info)) {
/* On the SNB it should be fixed to 128 for the DMV buffer */
width_in_mbs = 128;
}
@@ -552,7 +659,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
/* Setup current frame and current direct mv buffer*/
obj_surface = encode_state->reconstructed_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
if ( obj_surface->private_data == NULL) {
gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
@@ -642,7 +749,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
dri_bo_map(bo, 1);
coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
coded_buffer_segment->mapped = 0;
- coded_buffer_segment->codec = CODED_H264;
+ coded_buffer_segment->codec = encoder_context->codec;
dri_bo_unmap(bo);
return vaStatus;
@@ -662,44 +769,44 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
*/
int intel_format_lutvalue(int value, int max)
{
- int ret;
- int logvalue, temp1, temp2;
+ int ret;
+ int logvalue, temp1, temp2;
- if (value <= 0)
- return 0;
+ if (value <= 0)
+ return 0;
- logvalue = (int)(log2f((float)value));
- if (logvalue < 4) {
- ret = value;
- } else {
- int error, temp_value, base, j, temp_err;
- error = value;
- j = logvalue - 4 + 1;
- ret = -1;
- for(; j <= logvalue; j++) {
- if (j == 0) {
- base = value >> j;
- } else {
- base = (value + (1 << (j - 1)) - 1) >> j;
- }
- if (base >= 16)
- continue;
-
- temp_value = base << j;
- temp_err = abs(value - temp_value);
- if (temp_err < error) {
- error = temp_err;
- ret = (j << 4) | base;
- if (temp_err == 0)
- break;
- }
- }
- }
- temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
- temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
- if (temp1 > temp2)
- ret = max;
- return ret;
+ logvalue = (int)(log2f((float)value));
+ if (logvalue < 4) {
+ ret = value;
+ } else {
+ int error, temp_value, base, j, temp_err;
+ error = value;
+ j = logvalue - 4 + 1;
+ ret = -1;
+ for(; j <= logvalue; j++) {
+ if (j == 0) {
+ base = value >> j;
+ } else {
+ base = (value + (1 << (j - 1)) - 1) >> j;
+ }
+ if (base >= 16)
+ continue;
+
+ temp_value = base << j;
+ temp_err = abs(value - temp_value);
+ if (temp_err < error) {
+ error = temp_err;
+ ret = (j << 4) | base;
+ if (temp_err == 0)
+ break;
+ }
+ }
+ }
+ temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
+ temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
+ if (temp1 > temp2)
+ ret = max;
+ return ret;
}
@@ -709,19 +816,19 @@ int intel_format_lutvalue(int value, int max)
static float intel_lambda_qp(int qp)
{
- float value, lambdaf;
- value = qp;
- value = value / 6 - 2;
- if (value < 0)
- value = 0;
- lambdaf = roundf(powf(2, value));
- return lambdaf;
+ float value, lambdaf;
+ value = qp;
+ value = value / 6 - 2;
+ if (value < 0)
+ value = 0;
+ lambdaf = roundf(powf(2, value));
+ return lambdaf;
}
void intel_vme_update_mbmv_cost(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct gen6_vme_context *vme_context = encoder_context->vme_context;
@@ -756,30 +863,30 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
m_cost = 0;
vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
for (j = 1; j < 3; j++) {
- m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
- m_cost = (int)m_costf;
- vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
}
mv_count = 3;
for (j = 4; j <= 64; j *= 2) {
- m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
- m_cost = (int)m_costf;
- vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
- mv_count++;
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
+ mv_count++;
}
if (qp <= 25) {
- vme_state_message[MODE_INTRA_16X16] = 0x4a;
- vme_state_message[MODE_INTRA_8X8] = 0x4a;
- vme_state_message[MODE_INTRA_4X4] = 0x4a;
- vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
- vme_state_message[MODE_INTER_16X16] = 0x4a;
- vme_state_message[MODE_INTER_16X8] = 0x4a;
- vme_state_message[MODE_INTER_8X8] = 0x4a;
- vme_state_message[MODE_INTER_8X4] = 0x4a;
- vme_state_message[MODE_INTER_4X4] = 0x4a;
- vme_state_message[MODE_INTER_BWD] = 0x2a;
- return;
+ vme_state_message[MODE_INTRA_16X16] = 0x4a;
+ vme_state_message[MODE_INTRA_8X8] = 0x4a;
+ vme_state_message[MODE_INTRA_4X4] = 0x4a;
+ vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
+ vme_state_message[MODE_INTER_16X16] = 0x4a;
+ vme_state_message[MODE_INTER_16X8] = 0x4a;
+ vme_state_message[MODE_INTER_8X8] = 0x4a;
+ vme_state_message[MODE_INTER_8X4] = 0x4a;
+ vme_state_message[MODE_INTER_4X4] = 0x4a;
+ vme_state_message[MODE_INTER_BWD] = 0x2a;
+ return;
}
m_costf = lambda * 10;
vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
@@ -791,42 +898,42 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
m_cost = m_costf;
vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
if (slice_type == SLICE_TYPE_P) {
- m_costf = lambda * 2.5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
- m_costf = lambda * 4;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
- m_costf = lambda * 1.5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
- m_costf = lambda * 3;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
- m_costf = lambda * 5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
- /* BWD is not used in P-frame */
- vme_state_message[MODE_INTER_BWD] = 0;
+ m_costf = lambda * 2.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 4;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 1.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 3;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+ /* BWD is not used in P-frame */
+ vme_state_message[MODE_INTER_BWD] = 0;
} else {
- m_costf = lambda * 2.5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
- m_costf = lambda * 5.5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
- m_costf = lambda * 3.5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
- m_costf = lambda * 5.0;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
- m_costf = lambda * 6.5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
- m_costf = lambda * 1.5;
- m_cost = m_costf;
- vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 2.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 5.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 3.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 5.0;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 6.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 1.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
}
}
}
@@ -841,8 +948,8 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont
vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
- MB_SCOREBOARD_B |
- MB_SCOREBOARD_C);
+ MB_SCOREBOARD_B |
+ MB_SCOREBOARD_C);
/* In VME prediction the current mb depends on the neighbour
* A/B/C macroblock. So the left/up/up-right dependency should
@@ -862,25 +969,25 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont
/* check whether the mb of (x_index, y_index) is out of bound */
static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
{
- int mb_index;
- if (x_index < 0 || x_index >= mb_width)
- return -1;
- if (y_index < 0 || y_index >= mb_height)
- return -1;
+ int mb_index;
+ if (x_index < 0 || x_index >= mb_width)
+ return -1;
+ if (y_index < 0 || y_index >= mb_height)
+ return -1;
- mb_index = y_index * mb_width + x_index;
- if (mb_index < first_mb || mb_index > (first_mb + num_mb))
- return -1;
- return 0;
+ mb_index = y_index * mb_width + x_index;
+ if (mb_index < first_mb || mb_index > (first_mb + num_mb))
+ return -1;
+ return 0;
}
void
gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- int mb_width, int mb_height,
- int kernel,
- int transform_8x8_mode_flag,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
int mb_row;
@@ -922,7 +1029,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
if (x_inner != (mb_width -1)) {
mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
score_dep |= MB_SCOREBOARD_C;
- }
+ }
}
*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
@@ -944,7 +1051,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
xtemp_outer = mb_width - 2;
if (xtemp_outer < 0)
- xtemp_outer = 0;
+ xtemp_outer = 0;
x_outer = xtemp_outer;
y_outer = first_mb / mb_width;
for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
@@ -966,7 +1073,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
if (x_inner != (mb_width -1)) {
mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
score_dep |= MB_SCOREBOARD_C;
- }
+ }
}
*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
@@ -1001,113 +1108,513 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
static uint8_t
intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
{
- unsigned int is_long_term =
- !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
- unsigned int is_top_field =
- !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
- unsigned int is_bottom_field =
- !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
-
- return ((is_long_term << 6) |
- ((is_top_field ^ is_bottom_field ^ 1) << 5) |
- (frame_store_id << 1) |
- ((is_top_field ^ 1) & is_bottom_field));
+ unsigned int is_long_term =
+ !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
+ unsigned int is_top_field =
+ !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
+ unsigned int is_bottom_field =
+ !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
+
+ return ((is_long_term << 6) |
+ ((is_top_field ^ is_bottom_field ^ 1) << 5) |
+ (frame_store_id << 1) |
+ ((is_top_field ^ 1) & is_bottom_field));
}
void
intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
- struct intel_batchbuffer *batch = encoder_context->base.batch;
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- int slice_type;
- struct object_surface *slice_obj_surface, *obj_surface;
- int ref_surface_id;
- unsigned int fref_entry, bref_entry;
- int frame_index, i;
- VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
- VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
-
- fref_entry = 0x80808080;
- bref_entry = 0x80808080;
- slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
-
- if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList0[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[0];
- }
- frame_index = -1;
- for (i = 0; i < 16; i++) {
- if (obj_surface == encode_state->reference_objects[i]) {
- frame_index = i;
- break;
- }
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ int slice_type;
+ struct object_surface *obj_surface;
+ unsigned int fref_entry, bref_entry;
+ int frame_index, i;
+ VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+
+ fref_entry = 0x80808080;
+ bref_entry = 0x80808080;
+ slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+ if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
+ int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
+
+ if (ref_idx_l0 > 3) {
+ WARN_ONCE("ref_idx_l0 is out of range\n");
+ ref_idx_l0 = 0;
+ }
+
+ obj_surface = vme_context->used_reference_objects[0];
+ frame_index = -1;
+ for (i = 0; i < 16; i++) {
+ if (obj_surface &&
+ obj_surface == encode_state->reference_objects[i]) {
+ frame_index = i;
+ break;
+ }
+ }
+ if (frame_index == -1) {
+ WARN_ONCE("RefPicList0 is not found in DPB!\n");
+ } else {
+ int ref_idx_l0_shift = ref_idx_l0 * 8;
+ fref_entry &= ~(0xFF << ref_idx_l0_shift);
+ fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
+ }
+ }
+
+ if (slice_type == SLICE_TYPE_B) {
+ int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
+
+ if (ref_idx_l1 > 3) {
+ WARN_ONCE("ref_idx_l1 is out of range\n");
+ ref_idx_l1 = 0;
+ }
+
+ obj_surface = vme_context->used_reference_objects[1];
+ frame_index = -1;
+ for (i = 0; i < 16; i++) {
+ if (obj_surface &&
+ obj_surface == encode_state->reference_objects[i]) {
+ frame_index = i;
+ break;
+ }
+ }
+ if (frame_index == -1) {
+ WARN_ONCE("RefPicList1 is not found in DPB!\n");
+ } else {
+ int ref_idx_l1_shift = ref_idx_l1 * 8;
+ bref_entry &= ~(0xFF << ref_idx_l1_shift);
+ bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
+ }
+ }
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
+ OUT_BCS_BATCH(batch, 0); //Select L0
+ OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
+ for(i = 0; i < 7; i++) {
+ OUT_BCS_BATCH(batch, 0x80808080);
+ }
+ ADVANCE_BCS_BATCH(batch);
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
+ OUT_BCS_BATCH(batch, 1); //Select L1
+ OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
+ for(i = 0; i < 7; i++) {
+ OUT_BCS_BATCH(batch, 0x80808080);
+ }
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
+ int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+ uint32_t mv_x, mv_y;
+ VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
+ VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
+
+ if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
+ mv_x = 512;
+ mv_y = 64;
+ } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
+ mv_x = 1024;
+ mv_y = 128;
+ } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
+ mv_x = 2048;
+ mv_y = 128;
+ } else {
+ WARN_ONCE("Incorrect Mpeg2 level setting!\n");
+ mv_x = 512;
+ mv_y = 64;
+ }
+
+ pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ if (pic_param->picture_type != VAEncPictureTypeIntra) {
+ int qp, m_cost, j, mv_count;
+ float lambda, m_costf;
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)
+ encode_state->slice_params_ext[0]->buffer;
+ qp = slice_param->quantiser_scale_code;
+ lambda = intel_lambda_qp(qp);
+ /* No Intra prediction. So it is zero */
+ vme_state_message[MODE_INTRA_8X8] = 0;
+ vme_state_message[MODE_INTRA_4X4] = 0;
+ vme_state_message[MODE_INTER_MV0] = 0;
+ for (j = 1; j < 3; j++) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+ }
+ mv_count = 3;
+ for (j = 4; j <= 64; j *= 2) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + mv_count] =
+ intel_format_lutvalue(m_cost, 0x6f);
+ mv_count++;
+ }
+ m_cost = lambda;
+ /* It can only perform the 16x16 search. So mode cost can be ignored for
+ * the other mode. for example: 16x8/8x8
+ */
+ vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+
+ vme_state_message[MODE_INTER_16X8] = 0;
+ vme_state_message[MODE_INTER_8X8] = 0;
+ vme_state_message[MODE_INTER_8X4] = 0;
+ vme_state_message[MODE_INTER_4X4] = 0;
+ vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
+
+ }
+ vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
+
+ vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
+ width_in_mbs;
+}
+
+void
+gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ unsigned int *command_ptr;
+
+#define MPEG2_SCOREBOARD (1 << 21)
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ {
+ unsigned int mb_intra_ub, score_dep;
+ int x_outer, y_outer, x_inner, y_inner;
+ int xtemp_outer = 0;
+ int first_mb = 0;
+ int num_mb = mb_width * mb_height;
+
+ x_outer = 0;
+ y_outer = 0;
+
+
+ for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ x_inner = x_outer;
+ y_inner = y_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
}
- if (frame_index == -1) {
- WARN_ONCE("RefPicList0 is not found in DPB!\n");
- } else if (slice_obj_surface && slice_obj_surface->bo) {
- /* This is passed by Slice_param->RefPicList0 */
- fref_entry &= ~(0xFF);
- fref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList0[0], frame_index);
- } else {
- /* This is passed by the hacked mode */
- fref_entry &= ~(0xFF);
- fref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index);
+ if (y_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
}
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = MPEG2_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer += 1;
}
- if (slice_type == SLICE_TYPE_B) {
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList1[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[1];
- }
- frame_index = -1;
- for (i = 0; i < 16; i++) {
- if (obj_surface == encode_state->reference_objects[i]) {
- frame_index = i;
- break;
- }
+ xtemp_outer = mb_width - 2;
+ if (xtemp_outer < 0)
+ xtemp_outer = 0;
+ x_outer = xtemp_outer;
+ y_outer = 0;
+ for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ y_inner = y_outer;
+ x_inner = x_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
}
- if (frame_index == -1) {
- WARN_ONCE("RefPicList1 is not found in DPB!\n");
- } else if (slice_obj_surface && slice_obj_surface->bo) {
- bref_entry &= ~(0xFF);
- bref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList1[0], frame_index);
- } else {
- bref_entry &= ~(0xFF);
- bref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index);
+ if (y_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
}
- }
- BEGIN_BCS_BATCH(batch, 10);
- OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
- OUT_BCS_BATCH(batch, 0); //Select L0
- OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
- for(i = 0; i < 7; i++) {
- OUT_BCS_BATCH(batch, 0x80808080);
- }
- ADVANCE_BCS_BATCH(batch);
-
- BEGIN_BCS_BATCH(batch, 10);
- OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
- OUT_BCS_BATCH(batch, 1); //Select L1
- OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
- for(i = 0; i < 7; i++) {
- OUT_BCS_BATCH(batch, 0x80808080);
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = MPEG2_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer++;
+ if (x_outer >= mb_width) {
+ y_outer += 1;
+ x_outer = xtemp_outer;
+ }
}
- ADVANCE_BCS_BATCH(batch);
+ }
+
+ *command_ptr++ = 0;
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+ return;
+}
+
+static int
+avc_temporal_find_surface(VAPictureH264 *curr_pic,
+ VAPictureH264 *ref_list,
+ int num_pictures,
+ int dir)
+{
+ int i, found = -1, min = 0x7FFFFFFF;
+
+ for (i = 0; i < num_pictures; i++) {
+ int tmp;
+
+ if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
+ (ref_list[i].picture_id == VA_INVALID_SURFACE))
+ break;
+
+ tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
+
+ if (dir)
+ tmp = -tmp;
+
+ if (tmp > 0 && tmp < min) {
+ min = tmp;
+ found = i;
+ }
+ }
+
+ return found;
+}
+
+void
+intel_avc_vme_reference_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int list_index,
+ int surface_index,
+ void (* vme_source_surface_state)(
+ VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface,
+ struct intel_encoder_context *encoder_context))
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct object_surface *obj_surface = NULL;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ VASurfaceID ref_surface_id;
+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int max_num_references;
+ VAPictureH264 *curr_pic;
+ VAPictureH264 *ref_list;
+ int ref_idx;
+
+ if (list_index == 0) {
+ max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
+ ref_list = slice_param->RefPicList0;
+ } else {
+ max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
+ ref_list = slice_param->RefPicList1;
+ }
+
+ if (max_num_references == 1) {
+ if (list_index == 0) {
+ ref_surface_id = slice_param->RefPicList0[0].picture_id;
+ vme_context->used_references[0] = &slice_param->RefPicList0[0];
+ } else {
+ ref_surface_id = slice_param->RefPicList1[0].picture_id;
+ vme_context->used_references[1] = &slice_param->RefPicList1[0];
+ }
+
+ if (ref_surface_id != VA_INVALID_SURFACE)
+ obj_surface = SURFACE(ref_surface_id);
+
+ if (!obj_surface ||
+ !obj_surface->bo) {
+ obj_surface = encode_state->reference_objects[list_index];
+ vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
+ }
+
+ ref_idx = 0;
+ } else {
+ curr_pic = &pic_param->CurrPic;
+
+ /* select the reference frame in temporal space */
+ ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
+ ref_surface_id = ref_list[ref_idx].picture_id;
+
+ if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
+ obj_surface = SURFACE(ref_surface_id);
+
+ vme_context->used_reference_objects[list_index] = obj_surface;
+ vme_context->used_references[list_index] = &ref_list[ref_idx];
+ }
+
+ if (obj_surface &&
+ obj_surface->bo) {
+ assert(ref_idx >= 0);
+ vme_context->used_reference_objects[list_index] = obj_surface;
+ vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
+ vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
+ ref_idx << 16 |
+ ref_idx << 8 |
+ ref_idx);
+ } else {
+ vme_context->used_reference_objects[list_index] = NULL;
+ vme_context->used_references[list_index] = NULL;
+ vme_context->ref_index_in_mb[list_index] = 0;
+ }
+}
+
+void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index,
+ struct intel_batchbuffer *slice_batch)
+{
+ int count, i, start_index;
+ unsigned int length_in_bits;
+ VAEncPackedHeaderParameterBuffer *param = NULL;
+ unsigned int *header_data = NULL;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ int slice_header_index;
+
+ if (encode_state->slice_header_index[slice_index] == 0)
+ slice_header_index = -1;
+ else
+ slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
+
+ count = encode_state->slice_rawdata_count[slice_index];
+ start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
+
+ for (i = 0; i < count; i++) {
+ unsigned int skip_emul_byte_cnt;
+
+ header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
+
+ param = (VAEncPackedHeaderParameterBuffer *)
+ (encode_state->packed_header_params_ext[start_index + i]->buffer);
+
+ /* skip the slice header packed data type as it is lastly inserted */
+ if (param->type == VAEncPackedHeaderSlice)
+ continue;
+
+ length_in_bits = param->bit_length;
+
+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
+
+ /* as the slice header is still required, the last header flag is set to
+ * zero.
+ */
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ header_data,
+ ALIGN(length_in_bits, 32) >> 5,
+ length_in_bits & 0x1f,
+ skip_emul_byte_cnt,
+ 0,
+ 0,
+ !param->has_emulation_bytes,
+ slice_batch);
+ }
+
+ if (slice_header_index == -1) {
+ unsigned char *slice_header = NULL;
+ int slice_header_length_in_bits = 0;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
+
+ /* No slice header data is passed. And the driver needs to generate it */
+ /* For the Normal H264 */
+ slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
+ pPicParameter,
+ pSliceParameter,
+ &slice_header);
+ mfc_context->insert_object(ctx, encoder_context,
+ (unsigned int *)slice_header,
+ ALIGN(slice_header_length_in_bits, 32) >> 5,
+ slice_header_length_in_bits & 0x1f,
+ 5, /* first 5 bytes are start code + nal unit type */
+ 1, 0, 1, slice_batch);
+
+ free(slice_header);
+ } else {
+ unsigned int skip_emul_byte_cnt;
+
+ header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
+
+ param = (VAEncPackedHeaderParameterBuffer *)
+ (encode_state->packed_header_params_ext[slice_header_index]->buffer);
+ length_in_bits = param->bit_length;
+
+ /* as the slice header is the last header data for one slice,
+ * the last header flag is set to one.
+ */
+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
+
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ header_data,
+ ALIGN(length_in_bits, 32) >> 5,
+ length_in_bits & 0x1f,
+ skip_emul_byte_cnt,
+ 1,
+ 0,
+ !param->has_emulation_bytes,
+ slice_batch);
+ }
+
+ return;
}
diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
index 3f696dd..b6d19e8 100755
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -61,6 +61,7 @@ gen6_mfd_init_avc_surface(VADriverContextP ctx,
if (!gen6_avc_surface) {
gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+ gen6_avc_surface->frame_store_id = -1;
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = gen6_avc_surface;
}
@@ -130,7 +131,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
{
struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
struct object_surface *obj_surface = decode_state->render_object;
-
+ unsigned int surface_format;
+
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -138,7 +143,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 19) |
((obj_surface->orig_width - 1) << 6));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
(1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
(0 << 22) | /* surface object control state, FIXME??? */
((obj_surface->width - 1) << 3) | /* pitch */
@@ -416,7 +421,7 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx,
struct object_surface *obj_surface;
GenAvcSurface *gen6_avc_surface;
VAPictureH264 *va_pic;
- int i, j;
+ int i;
BEGIN_BCS_BATCH(batch, 69);
OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
@@ -468,26 +473,14 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx,
/* POC List */
for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
- if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
- int found = 0;
+ obj_surface = gen6_mfd_context->reference_surface[i].obj_surface;
- assert(gen6_mfd_context->reference_surface[i].obj_surface != NULL);
-
- for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
- va_pic = &pic_param->ReferenceFrames[j];
-
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
-
- if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
- found = 1;
- break;
- }
- }
+ if (obj_surface) {
+ const VAPictureH264 * const va_pic = avc_find_picture(
+ obj_surface->base.id, pic_param->ReferenceFrames,
+ ARRAY_ELEMS(pic_param->ReferenceFrames));
- assert(found == 1);
- assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
-
+ assert(va_pic != NULL);
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
} else {
@@ -603,32 +596,6 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
}
-static void
-gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
- VAPictureParameterBufferH264 *pic_param,
- struct gen6_mfd_context *gen6_mfd_context)
-{
- struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
- int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
- int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
-
- BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
- OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch,
- height_in_mbs << 24 |
- width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- ADVANCE_BCS_BATCH(batch);
-}
-
static inline void
gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
@@ -730,29 +697,20 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
}
static void
-gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
- VAPictureParameterBufferH264 *pic_param,
- struct gen6_mfd_context *gen6_mfd_context)
+gen6_mfd_avc_phantom_slice_first(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen6_mfd_context *gen6_mfd_context)
{
- struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
-
- BEGIN_BCS_BATCH(batch, 6);
- OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- ADVANCE_BCS_BATCH(batch);
+ gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen6_mfd_context->base.batch);
}
static void
-gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
- VAPictureParameterBufferH264 *pic_param,
- struct gen6_mfd_context *gen6_mfd_context)
+gen6_mfd_avc_phantom_slice_last(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ struct gen6_mfd_context *gen6_mfd_context)
{
- gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
- gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
+ gen6_mfd_avc_phantom_slice(ctx, pic_param, NULL, gen6_mfd_context->base.batch);
}
static void
@@ -791,25 +749,18 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx,
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
- intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen6_mfd_context->reference_surface);
+ intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
+ gen6_mfd_context->reference_surface, &gen6_mfd_context->fs_ctx);
width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
/* Current decoded picture */
obj_surface = decode_state->render_object;
- obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
- obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
-
- /* initial uv component for YUV400 case */
- if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
- unsigned int uv_offset = obj_surface->width * obj_surface->height;
- unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
-
- drm_intel_gem_bo_map_gtt(obj_surface->bo);
- memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
- drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
- }
+ if (pic_param->pic_fields.bits.reference_pic_flag)
+ obj_surface->flags |= SURFACE_REFERENCED;
+ else
+ obj_surface->flags &= ~SURFACE_REFERENCED;
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
@@ -896,6 +847,10 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx,
else
next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+ if (j == 0 &&
+ slice_param->first_mb_in_slice)
+ gen6_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen6_mfd_context);
+
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
assert((slice_param->slice_type == SLICE_TYPE_I) ||
@@ -918,7 +873,7 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx,
}
}
- gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
+ gen6_mfd_avc_phantom_slice_last(ctx, pic_param, gen6_mfd_context);
intel_batchbuffer_end_atomic(batch);
intel_batchbuffer_flush(batch);
}
@@ -947,7 +902,7 @@ gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
@@ -1121,9 +1076,9 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
{
struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
VAPictureParameterBufferMPEG2 *pic_param;
- VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
+ VASliceParameterBufferMPEG2 *slice_param, *next_slice_param;
dri_bo *slice_data_bo;
- int i, j;
+ int group_idx = 0, pre_group_idx = -1, element_idx = 0;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
@@ -1142,28 +1097,18 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
gen6_mfd_context->wa_mpeg2_slice_vertical_position =
mpeg2_wa_slice_vertical_position(decode_state, pic_param);
- for (j = 0; j < decode_state->num_slice_params; j++) {
- assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
- slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
- slice_data_bo = decode_state->slice_datas[j]->bo;
- gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
+ slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[group_idx]->buffer;
- if (j == decode_state->num_slice_params - 1)
- next_slice_group_param = NULL;
- else
- next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
-
- for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
- assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
-
- if (i < decode_state->slice_params[j]->num_elements - 1)
- next_slice_param = slice_param + 1;
- else
- next_slice_param = next_slice_group_param;
-
- gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
- slice_param++;
+ for (; slice_param;) {
+ if (pre_group_idx != group_idx) {
+ slice_data_bo = decode_state->slice_datas[group_idx]->bo;
+ gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
+ pre_group_idx = group_idx;
}
+
+ next_slice_param = intel_mpeg2_find_next_slice(decode_state, pic_param, slice_param, &group_idx, &element_idx);
+ gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
+ slice_param = next_slice_param;
}
intel_batchbuffer_end_atomic(batch);
@@ -1270,7 +1215,7 @@ gen6_mfd_vc1_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
@@ -1837,9 +1782,10 @@ gen6_mfd_decode_picture(VADriverContextP ctx,
gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
+ case VAProfileH264StereoHigh:
gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
break;
diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h
index de131d6..f499803 100644
--- a/src/gen6_mfd.h
+++ b/src/gen6_mfd.h
@@ -62,6 +62,7 @@ struct gen6_mfd_context
VAIQMatrixBufferMPEG2 mpeg2;
} iq_matrix;
+ GenFrameStoreContext fs_ctx;
GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES];
GenBuffer post_deblocking_output;
GenBuffer pre_deblocking_output;
diff --git a/src/gen6_vme.c b/src/gen6_vme.c
index 69c667d..2e02591 100644
--- a/src/gen6_vme.c
+++ b/src/gen6_vme.c
@@ -40,14 +40,6 @@
#include "gen6_vme.h"
#include "gen6_mfc.h"
-#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
-
-#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6)
-
#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
@@ -204,7 +196,6 @@ gen6_vme_surface_setup(VADriverContextP ctx,
struct intel_encoder_context *encoder_context)
{
struct object_surface *obj_surface;
- struct i965_driver_data *i965 = i965_driver_data(ctx);
/*Setup surfaces state*/
/* current picture for encoding */
@@ -215,43 +206,14 @@ gen6_vme_surface_setup(VADriverContextP ctx,
if (!is_intra) {
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
int slice_type;
- struct object_surface *slice_obj_surface;
- int ref_surface_id;
slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
- if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList0[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[0];
- }
- /* reference 0 */
- if (obj_surface && obj_surface->bo)
- gen6_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
- }
- if (slice_type == SLICE_TYPE_B) {
- /* reference 1 */
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList1[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[0];
- }
-
- obj_surface = encode_state->reference_objects[1];
- if (obj_surface && obj_surface->bo)
- gen6_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
- }
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen6_vme_source_surface_state);
+
+ if (slice_type == SLICE_TYPE_B)
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen6_vme_source_surface_state);
}
/* VME output */
@@ -319,7 +281,7 @@ static VAStatus gen6_vme_constant_setup(VADriverContextP ctx,
if (vme_context->h264_level >= 30) {
mv_num = 16;
if (vme_context->h264_level >= 31)
- mv_num = 8;
+ mv_num = 8;
}
dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
@@ -422,21 +384,38 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
dri_bo_map(vme_context->vme_state.bo, 1);
assert(vme_context->vme_state.bo->virtual);
vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
-
- vme_state_message[0] = 0x01010101;
- vme_state_message[1] = 0x10010101;
- vme_state_message[2] = 0x0F0F0F0F;
- vme_state_message[3] = 0x100F0F0F;
- vme_state_message[4] = 0x01010101;
- vme_state_message[5] = 0x10010101;
- vme_state_message[6] = 0x0F0F0F0F;
- vme_state_message[7] = 0x100F0F0F;
- vme_state_message[8] = 0x01010101;
- vme_state_message[9] = 0x10010101;
- vme_state_message[10] = 0x0F0F0F0F;
- vme_state_message[11] = 0x000F0F0F;
- vme_state_message[12] = 0x00;
- vme_state_message[13] = 0x00;
+
+ if (encoder_context->quality_level != ENCODER_LOW_QUALITY) {
+ vme_state_message[0] = 0x01010101;
+ vme_state_message[1] = 0x10010101;
+ vme_state_message[2] = 0x0F0F0F0F;
+ vme_state_message[3] = 0x100F0F0F;
+ vme_state_message[4] = 0x01010101;
+ vme_state_message[5] = 0x10010101;
+ vme_state_message[6] = 0x0F0F0F0F;
+ vme_state_message[7] = 0x100F0F0F;
+ vme_state_message[8] = 0x01010101;
+ vme_state_message[9] = 0x10010101;
+ vme_state_message[10] = 0x0F0F0F0F;
+ vme_state_message[11] = 0x000F0F0F;
+ vme_state_message[12] = 0x00;
+ vme_state_message[13] = 0x00;
+ } else {
+ vme_state_message[0] = 0x10010101;
+ vme_state_message[1] = 0x100F0F0F;
+ vme_state_message[2] = 0x10010101;
+ vme_state_message[3] = 0x000F0F0F;
+ vme_state_message[4] = 0;
+ vme_state_message[5] = 0;
+ vme_state_message[6] = 0;
+ vme_state_message[7] = 0;
+ vme_state_message[8] = 0;
+ vme_state_message[9] = 0;
+ vme_state_message[10] = 0;
+ vme_state_message[11] = 0;
+ vme_state_message[12] = 0;
+ vme_state_message[13] = 0;
+ }
vme_state_message[14] = 0x4a4a;
vme_state_message[15] = 0x0;
@@ -490,7 +469,7 @@ gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx,
number_mb_cmds = slice_mb_number - i;
}
- *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
*command_ptr++ = kernel;
*command_ptr++ = 0;
*command_ptr++ = 0;
@@ -500,6 +479,7 @@ gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx,
/*inline data */
*command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
*command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag | ((i==0) << 1));
+ *command_ptr++ = encoder_context->quality_level;
i += number_mb_cmds;
}
@@ -558,7 +538,7 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
intel_batchbuffer_start_atomic(batch, 0x1000);
gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
BEGIN_BATCH(batch, 2);
- OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(batch,
vme_context->vme_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
@@ -579,7 +559,7 @@ static VAStatus gen6_vme_prepare(VADriverContextP ctx,
struct gen6_vme_context *vme_context = encoder_context->vme_context;
if (!vme_context->h264_level ||
- (vme_context->h264_level != pSequenceParameter->level_idc)) {
+ (vme_context->h264_level != pSequenceParameter->level_idc)) {
vme_context->h264_level = pSequenceParameter->level_idc;
}
/*Setup all the memory object*/
@@ -649,9 +629,7 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
{
struct gen6_vme_context *vme_context = NULL;
- if (encoder_context->profile != VAProfileH264Baseline &&
- encoder_context->profile != VAProfileH264Main &&
- encoder_context->profile != VAProfileH264High) {
+ if (encoder_context->codec != CODEC_H264) {
/* Never get here */
assert(0);
return False;
@@ -659,7 +637,7 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context = calloc(1, sizeof(struct gen6_vme_context));
vme_context->gpe_context.surface_state_binding_table.length =
- (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
diff --git a/src/gen6_vme.h b/src/gen6_vme.h
index 17f199e..d461982 100644
--- a/src/gen6_vme.h
+++ b/src/gen6_vme.h
@@ -62,31 +62,44 @@ struct gen6_vme_context
void (*vme_surface2_setup)(VADriverContextP ctx,
- struct i965_gpe_context *gpe_context,
- struct object_surface *obj_surface,
- unsigned long binding_table_offset,
- unsigned long surface_state_offset);
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset);
void (*vme_media_rw_surface_setup)(VADriverContextP ctx,
- struct i965_gpe_context *gpe_context,
- struct object_surface *obj_surface,
- unsigned long binding_table_offset,
- unsigned long surface_state_offset);
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset);
void (*vme_buffer_suface_setup)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_buffer_surface *buffer_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void (*vme_media_chroma_surface_setup)(VADriverContextP ctx,
- struct i965_gpe_context *gpe_context,
- struct object_surface *obj_surface,
- unsigned long binding_table_offset,
- unsigned long surface_state_offset);
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset);
void *vme_state_message;
unsigned int h264_level;
unsigned int video_coding_type;
unsigned int vme_kernel_sum;
+ unsigned int mpeg2_level;
+
+ struct object_surface *used_reference_objects[2];
+ void *used_references[2];
+ unsigned int ref_index_in_mb[2];
};
+#define MPEG2_PIC_WIDTH_HEIGHT 30
+#define MPEG2_MV_RANGE 29
+#define MPEG2_LEVEL_MASK 0x0f
+#define MPEG2_LEVEL_LOW 0x0a
+#define MPEG2_LEVEL_MAIN 0x08
+#define MPEG2_LEVEL_HIGH 0x04
+
+
Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
extern void intel_vme_update_mbmv_cost(VADriverContextP ctx,
@@ -128,13 +141,38 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
extern void
gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- int mb_width, int mb_height,
- int kernel,
- int transform_8x8_mode_flag,
- struct intel_encoder_context *encoder_context);
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context);
extern void
gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context);
+extern void
+intel_vme_mpeg2_state_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
+
+extern void
+gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ struct intel_encoder_context *encoder_context);
+
+void
+intel_avc_vme_reference_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int list_index,
+ int surface_index,
+ void (* vme_source_surface_state)(
+ VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface,
+ struct intel_encoder_context *encoder_context));
+
+extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
#endif /* _GEN6_VME_H_ */
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index cfc3c22..a6a3c1d 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -43,33 +43,31 @@
#include "gen6_vme.h"
#include "intel_media.h"
-#define MFC_SOFTWARE_HASWELL 1
+#define AVC_INTRA_RDO_OFFSET 4
+#define AVC_INTER_RDO_OFFSET 10
+#define AVC_INTER_MSG_OFFSET 8
+#define AVC_INTER_MV_OFFSET 48
+#define AVC_RDO_MASK 0xFFFF
+
+#define MFC_SOFTWARE_HASWELL 0
+
+#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
#define B0_STEP_REV 2
#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
-static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
-#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
-};
-
-static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
-#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
+static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
+#include "shaders/utils/mfc_batchbuffer_hsw.g75b"
};
static struct i965_kernel gen75_mfc_kernels[] = {
{
"MFC AVC INTRA BATCHBUFFER ",
MFC_BATCHBUFFER_AVC_INTRA,
- gen75_mfc_batchbuffer_avc_intra,
- sizeof(gen75_mfc_batchbuffer_avc_intra),
- NULL
- },
-
- {
- "MFC AVC INTER BATCHBUFFER ",
- MFC_BATCHBUFFER_AVC_INTER,
- gen75_mfc_batchbuffer_avc_inter,
- sizeof(gen75_mfc_batchbuffer_avc_inter),
+ gen75_mfc_batchbuffer_avc,
+ sizeof(gen75_mfc_batchbuffer_avc),
NULL
},
};
@@ -86,8 +84,8 @@ static struct i965_kernel gen75_mfc_kernels[] = {
static void
gen75_mfc_pipe_mode_select(VADriverContextP ctx,
- int standard_select,
- struct intel_encoder_context *encoder_context)
+ int standard_select,
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -152,7 +150,7 @@ gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *enco
static void
gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context)
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -161,11 +159,11 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
BEGIN_BCS_BATCH(batch, 26);
OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
- /* the DW1-3 is for the MFX indirect bistream offset */
+ /* the DW1-3 is for the MFX indirect bistream offset */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
- /* the DW4-5 is the MFX upper bound */
+ /* the DW4-5 is the MFX upper bound */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
@@ -176,14 +174,14 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
OUT_BCS_BATCH(batch, 0);
- /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
+ /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
- /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
+ /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
@@ -247,7 +245,7 @@ gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_con
static void
gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -261,7 +259,7 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
/*DW1. MB setting of frame */
OUT_BCS_BATCH(batch,
- ((width_in_mbs * height_in_mbs) & 0xFFFF));
+ ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
OUT_BCS_BATCH(batch,
((height_in_mbs - 1) << 16) |
((width_in_mbs - 1) << 0));
@@ -291,22 +289,22 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
(1 << 2) | /* Frame MB only flag */
(0 << 1) | /* MBAFF mode is in active */
(0 << 0)); /* Field picture flag */
- /* DW5 Trellis quantization */
+ /* DW5 Trellis quantization */
OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
(0xBB8 << 16) | /* InterMbMaxSz */
(0xEE8) ); /* IntraMbMaxSz */
OUT_BCS_BATCH(batch, 0); /* Reserved */
- /* DW8. QP delta */
+ /* DW8. QP delta */
OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
- /* DW10. Bit setting for MB */
+ /* DW10. Bit setting for MB */
OUT_BCS_BATCH(batch, 0x8C000000);
OUT_BCS_BATCH(batch, 0x00010000);
- /* DW12. */
+ /* DW12. */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0x02010100);
- /* DW14. For short format */
+ /* DW14. For short format */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
@@ -315,10 +313,10 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
static void
gen75_mfc_qm_state(VADriverContextP ctx,
- int qm_type,
- unsigned int *qm,
- int qm_length,
- struct intel_encoder_context *encoder_context)
+ int qm_type,
+ unsigned int *qm,
+ int qm_length,
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
unsigned int qm_buffer[16];
@@ -352,10 +350,10 @@ gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encod
static void
gen75_mfc_fqm_state(VADriverContextP ctx,
- int fqm_type,
- unsigned int *fqm,
- int fqm_length,
- struct intel_encoder_context *encoder_context)
+ int fqm_type,
+ unsigned int *fqm,
+ int fqm_length,
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
unsigned int fqm_buffer[32];
@@ -393,9 +391,9 @@ gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enco
static void
gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
- unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
- int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
- struct intel_batchbuffer *batch)
+ unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
+ int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
+ struct intel_batchbuffer *batch)
{
if (batch == NULL)
batch = encoder_context->base.batch;
@@ -418,16 +416,33 @@ gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *
static void gen75_mfc_init(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
dri_bo *bo;
int i;
- VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
- int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
- int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ int width_in_mbs = 0;
+ int height_in_mbs = 0;
+ int slice_batchbuffer_size;
+
+ if (encoder_context->codec == CODEC_H264 ||
+ encoder_context->codec == CODEC_H264_MVC) {
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ } else {
+ VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+
+ assert(encoder_context->codec == CODEC_MPEG2);
+
+ width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
+ height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
+ }
+
+ slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
+ (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
/*Encode common setup for MFC*/
dri_bo_unreference(mfc_context->post_deblocking_output.bo);
@@ -495,7 +510,8 @@ static void gen75_mfc_init(VADriverContextP ctx,
if (mfc_context->aux_batchbuffer)
intel_batchbuffer_free(mfc_context->aux_batchbuffer);
- mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
+ mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
+ slice_batchbuffer_size);
mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
mfc_context->aux_batchbuffer_surface.pitch = 16;
@@ -507,7 +523,7 @@ static void gen75_mfc_init(VADriverContextP ctx,
static void
gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context)
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -525,9 +541,9 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
else
OUT_BCS_BATCH(batch, 0); /* pre output addr */
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- /* the DW4-6 is for the post_deblocking */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* the DW4-6 is for the post_deblocking */
if (mfc_context->post_deblocking_output.bo)
OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
@@ -535,37 +551,37 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
0); /* post output addr */
else
OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* the DW7-9 is for the uncompressed_picture */
+ /* the DW7-9 is for the uncompressed_picture */
OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0); /* uncompressed data */
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* the DW10-12 is for the mb status */
+ /* the DW10-12 is for the mb status */
OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0); /* StreamOut data*/
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* the DW13-15 is for the intra_row_store_scratch */
+ /* the DW13-15 is for the intra_row_store_scratch */
OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* the DW16-18 is for the deblocking filter */
+ /* the DW16-18 is for the deblocking filter */
OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
/* the DW 19-50 is for Reference pictures*/
for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
@@ -578,25 +594,25 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
}
OUT_BCS_BATCH(batch, 0);
}
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* The DW 52-54 is for the MB status buffer */
+ /* The DW 52-54 is for the MB status buffer */
OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0); /* Macroblock status buffer*/
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* the DW 55-57 is the ILDB buffer */
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ /* the DW 55-57 is the ILDB buffer */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* the DW 58-60 is the second ILDB buffer */
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ /* the DW 58-60 is the second ILDB buffer */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
@@ -657,14 +673,14 @@ gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0); /* Macroblock status buffer*/
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
static void
gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context)
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -688,15 +704,15 @@ gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
OUT_BCS_BATCH(batch, 0);
}
}
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
- /* the DW34-36 is the MV for the current reference */
- OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0);
+ /* the DW34-36 is the MV for the current reference */
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
/* POL list */
for(i = 0; i < 32; i++) {
@@ -749,7 +765,7 @@ gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_contex
static void
gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context)
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -763,12 +779,12 @@ gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
- /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
+ /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
- /* the DW7-9 is for Bitplane Read Buffer Base Address */
+ /* the DW7-9 is for Bitplane Read Buffer Base Address */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
@@ -802,8 +818,8 @@ gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_con
static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -821,8 +837,8 @@ static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
static VAStatus gen75_mfc_run(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
@@ -834,9 +850,9 @@ static VAStatus gen75_mfc_run(VADriverContextP ctx,
static VAStatus
gen75_mfc_stop(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int *encoded_bits_size)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int *encoded_bits_size)
{
VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
@@ -853,13 +869,13 @@ gen75_mfc_stop(VADriverContextP ctx,
static void
gen75_mfc_avc_slice_state(VADriverContextP ctx,
- VAEncPictureParameterBufferH264 *pic_param,
- VAEncSliceParameterBufferH264 *slice_param,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int rate_control_enable,
- int qp,
- struct intel_batchbuffer *batch)
+ VAEncPictureParameterBufferH264 *pic_param,
+ VAEncSliceParameterBufferH264 *slice_param,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int rate_control_enable,
+ int qp,
+ struct intel_batchbuffer *batch)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
@@ -875,19 +891,32 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx,
int maxQpN, maxQpP;
unsigned char correct[6], grow, shrink;
int i;
- int bslice = 0;
int weighted_pred_idc = 0;
unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
+ int num_ref_l0 = 0, num_ref_l1 = 0;
if (batch == NULL)
batch = encoder_context->base.batch;
- if (slice_type == SLICE_TYPE_P) {
+ if (slice_type == SLICE_TYPE_I) {
+ luma_log2_weight_denom = 0;
+ chroma_log2_weight_denom = 0;
+ } else if (slice_type == SLICE_TYPE_P) {
weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
+
+ if (slice_param->num_ref_idx_active_override_flag)
+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
} else if (slice_type == SLICE_TYPE_B) {
weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
- bslice = 1;
+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
+
+ if (slice_param->num_ref_idx_active_override_flag) {
+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+ }
if (weighted_pred_idc == 2) {
/* 8.4.3 - Derivation process for prediction weights (8-279) */
@@ -912,14 +941,11 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx,
OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
- if (slice_type == SLICE_TYPE_I) {
- OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
- } else {
- OUT_BCS_BATCH(batch,
- (1 << 16) | (bslice << 24) | /*1 reference frame*/
- (chroma_log2_weight_denom << 8) |
- (luma_log2_weight_denom << 0));
- }
+ OUT_BCS_BATCH(batch,
+ (num_ref_l0 << 16) |
+ (num_ref_l1 << 24) |
+ (chroma_log2_weight_denom << 8) |
+ (luma_log2_weight_denom << 0));
OUT_BCS_BATCH(batch,
(weighted_pred_idc << 30) |
@@ -969,14 +995,14 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx,
}
-#ifdef MFC_SOFTWARE_HASWELL
+#if MFC_SOFTWARE_HASWELL
static int
gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
- int qp,unsigned int *msg,
- struct intel_encoder_context *encoder_context,
- unsigned char target_mb_size, unsigned char max_mb_size,
- struct intel_batchbuffer *batch)
+ int qp,unsigned int *msg,
+ struct intel_encoder_context *encoder_context,
+ unsigned char target_mb_size, unsigned char max_mb_size,
+ struct intel_batchbuffer *batch)
{
int len_in_dwords = 12;
unsigned int intra_msg;
@@ -1023,13 +1049,14 @@ gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
static int
gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
- unsigned int *msg, unsigned int offset,
- struct intel_encoder_context *encoder_context,
- unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
- struct intel_batchbuffer *batch)
+ unsigned int *msg, unsigned int offset,
+ struct intel_encoder_context *encoder_context,
+ unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
+ struct intel_batchbuffer *batch)
{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
int len_in_dwords = 12;
- unsigned int inter_msg = 0;
+ unsigned int inter_msg = 0;
if (batch == NULL)
batch = encoder_context->base.batch;
{
@@ -1041,30 +1068,30 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i
* command.
*/
if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
- /* MV[0] and MV[2] are replicated */
- mv_ptr[4] = mv_ptr[0];
- mv_ptr[5] = mv_ptr[1];
- mv_ptr[2] = mv_ptr[8];
- mv_ptr[3] = mv_ptr[9];
- mv_ptr[6] = mv_ptr[8];
- mv_ptr[7] = mv_ptr[9];
+ /* MV[0] and MV[2] are replicated */
+ mv_ptr[4] = mv_ptr[0];
+ mv_ptr[5] = mv_ptr[1];
+ mv_ptr[2] = mv_ptr[8];
+ mv_ptr[3] = mv_ptr[9];
+ mv_ptr[6] = mv_ptr[8];
+ mv_ptr[7] = mv_ptr[9];
} else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
- /* MV[0] and MV[1] are replicated */
- mv_ptr[2] = mv_ptr[0];
- mv_ptr[3] = mv_ptr[1];
- mv_ptr[4] = mv_ptr[16];
- mv_ptr[5] = mv_ptr[17];
- mv_ptr[6] = mv_ptr[24];
- mv_ptr[7] = mv_ptr[25];
+ /* MV[0] and MV[1] are replicated */
+ mv_ptr[2] = mv_ptr[0];
+ mv_ptr[3] = mv_ptr[1];
+ mv_ptr[4] = mv_ptr[16];
+ mv_ptr[5] = mv_ptr[17];
+ mv_ptr[6] = mv_ptr[24];
+ mv_ptr[7] = mv_ptr[25];
} else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
- !(msg[1] & SUBMB_SHAPE_MASK)) {
- /* Don't touch MV[0] or MV[1] */
- mv_ptr[2] = mv_ptr[8];
- mv_ptr[3] = mv_ptr[9];
- mv_ptr[4] = mv_ptr[16];
- mv_ptr[5] = mv_ptr[17];
- mv_ptr[6] = mv_ptr[24];
- mv_ptr[7] = mv_ptr[25];
+ !(msg[1] & SUBMB_SHAPE_MASK)) {
+ /* Don't touch MV[0] or MV[1] */
+ mv_ptr[2] = mv_ptr[8];
+ mv_ptr[3] = mv_ptr[9];
+ mv_ptr[4] = mv_ptr[16];
+ mv_ptr[5] = mv_ptr[17];
+ mv_ptr[6] = mv_ptr[24];
+ mv_ptr[7] = mv_ptr[25];
}
}
@@ -1072,21 +1099,21 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i
OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
- inter_msg = 32;
- /* MV quantity */
- if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
- if (msg[1] & SUBMB_SHAPE_MASK)
- inter_msg = 128;
- }
+ inter_msg = 32;
+ /* MV quantity */
+ if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
+ if (msg[1] & SUBMB_SHAPE_MASK)
+ inter_msg = 128;
+ }
OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
OUT_BCS_BATCH(batch, offset);
- inter_msg = msg[0] & (0x1F00FFFF);
- inter_msg |= INTER_MV8;
- inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
- if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
- (msg[1] & SUBMB_SHAPE_MASK)) {
- inter_msg |= INTER_MV32;
- }
+ inter_msg = msg[0] & (0x1F00FFFF);
+ inter_msg |= INTER_MV8;
+ inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
+ if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
+ (msg[1] & SUBMB_SHAPE_MASK)) {
+ inter_msg |= INTER_MV32;
+ }
OUT_BCS_BATCH(batch, inter_msg);
@@ -1102,11 +1129,11 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i
OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
#endif
- inter_msg = msg[1] >> 8;
+ inter_msg = msg[1] >> 8;
/*Stuff for Inter MB*/
OUT_BCS_BATCH(batch, inter_msg);
- OUT_BCS_BATCH(batch, 0x0);
- OUT_BCS_BATCH(batch, 0x0);
+ OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
+ OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
/*MaxSizeInWord and TargetSzieInWord*/
OUT_BCS_BATCH(batch, (max_mb_size << 24) |
@@ -1119,18 +1146,12 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i
return len_in_dwords;
}
-#define AVC_INTRA_RDO_OFFSET 4
-#define AVC_INTER_RDO_OFFSET 10
-#define AVC_INTER_MSG_OFFSET 8
-#define AVC_INTER_MV_OFFSET 48
-#define AVC_RDO_MASK 0xFFFF
-
static void
gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int slice_index,
- struct intel_batchbuffer *slice_batch)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index,
+ struct intel_batchbuffer *slice_batch)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct gen6_vme_context *vme_context = encoder_context->vme_context;
@@ -1145,15 +1166,18 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int i,x,y;
int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
unsigned int rate_control_mode = encoder_context->rate_control_mode;
- unsigned char *slice_header = NULL;
- int slice_header_length_in_bits = 0;
unsigned int tail_data[] = { 0x0, 0x0 };
int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
int is_intra = slice_type == SLICE_TYPE_I;
+ int qp_slice;
+ qp_slice = qp;
if (rate_control_mode == VA_RC_CBR) {
qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
- pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ if (encode_state->slice_header_index[slice_index] == 0) {
+ pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ qp_slice = qp;
+ }
}
/* only support for 8-bit pixel bit-depth */
@@ -1162,22 +1186,16 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
assert(qp >= 0 && qp < 52);
- gen75_mfc_avc_slice_state(ctx,
- pPicParameter,
- pSliceParameter,
- encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp, slice_batch);
+ gen75_mfc_avc_slice_state(ctx,
+ pPicParameter,
+ pSliceParameter,
+ encode_state, encoder_context,
+ (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
- if ( slice_index == 0)
+ if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
- slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
-
- // slice hander
- mfc_context->insert_object(ctx, encoder_context,
- (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
- 5, /* first 5 bytes are start code + nal unit type */
- 1, 0, 1, slice_batch);
+ intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
dri_bo_map(vme_context->vme_output.bo , 1);
msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
@@ -1223,27 +1241,21 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
tail_data, 1, 8,
1, 1, 1, 0, slice_batch);
}
-
- free(slice_header);
-
}
static dri_bo *
gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch;
dri_bo *batch_bo;
int i;
int buffer_size;
- VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
- int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
- int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
- buffer_size = width_in_mbs * height_in_mbs * 64;
- batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
+ batch = mfc_context->aux_batchbuffer;
batch_bo = batch->buffer;
for (i = 0; i < encode_state->num_slice_params_ext; i++) {
gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
@@ -1257,7 +1269,9 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
dri_bo_reference(batch_bo);
+
intel_batchbuffer_free(batch);
+ mfc_context->aux_batchbuffer = NULL;
return batch_bo;
}
@@ -1266,8 +1280,8 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
static void
gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
@@ -1279,43 +1293,27 @@ gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
&vme_context->vme_output,
BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
- assert(mfc_context->aux_batchbuffer_surface.bo);
- mfc_context->buffer_suface_setup(ctx,
- &mfc_context->gpe_context,
- &mfc_context->aux_batchbuffer_surface,
- BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
- SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
}
static void
gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
- VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
- int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
- int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
- mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
- mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
- mfc_context->mfc_batchbuffer_surface.pitch = 16;
- mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
- "MFC batchbuffer",
- mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
- 0x1000);
+ assert(mfc_context->aux_batchbuffer_surface.bo);
mfc_context->buffer_suface_setup(ctx,
&mfc_context->gpe_context,
- &mfc_context->mfc_batchbuffer_surface,
+ &mfc_context->aux_batchbuffer_surface,
BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
}
static void
gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
@@ -1323,8 +1321,8 @@ gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
static void
gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct gen6_interface_descriptor_data *desc;
@@ -1366,155 +1364,140 @@ gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
static void
gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
(void)mfc_context;
}
+#define AVC_PAK_LEN_IN_BYTE 48
+#define AVC_PAK_LEN_IN_OWORD 3
+
static void
gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
- int index,
- int head_offset,
- int batchbuffer_offset,
- int head_size,
- int tail_size,
- int number_mb_cmds,
- int first_object,
- int last_object,
- int last_slice,
- int mb_x,
- int mb_y,
- int width_in_mbs,
- int qp)
+ uint32_t intra_flag,
+ int head_offset,
+ int number_mb_cmds,
+ int slice_end_x,
+ int slice_end_y,
+ int mb_x,
+ int mb_y,
+ int width_in_mbs,
+ int qp,
+ uint32_t fwd_ref,
+ uint32_t bwd_ref)
{
- BEGIN_BATCH(batch, 12);
+ uint32_t temp_value;
+ BEGIN_BATCH(batch, 14);
- OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
- OUT_BATCH(batch, index);
+ OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
+ OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
/*inline data */
- OUT_BATCH(batch, head_offset);
- OUT_BATCH(batch, batchbuffer_offset);
- OUT_BATCH(batch,
- head_size << 16 |
- tail_size);
- OUT_BATCH(batch,
- number_mb_cmds << 16 |
- first_object << 2 |
- last_object << 1 |
- last_slice);
- OUT_BATCH(batch,
- mb_y << 8 |
- mb_x);
+ OUT_BATCH(batch, head_offset / 16);
+ OUT_BATCH(batch, (intra_flag) | (qp << 16));
+ temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
+ OUT_BATCH(batch, temp_value);
+
+ OUT_BATCH(batch, number_mb_cmds);
+
OUT_BATCH(batch,
- qp << 16 |
- width_in_mbs);
+ ((slice_end_y << 8) | (slice_end_x)));
+ OUT_BATCH(batch, fwd_ref);
+ OUT_BATCH(batch, bwd_ref);
+
+ OUT_BATCH(batch, MI_NOOP);
ADVANCE_BATCH(batch);
}
static void
gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context,
- VAEncSliceParameterBufferH264 *slice_param,
- int head_offset,
- unsigned short head_size,
- unsigned short tail_size,
- int batchbuffer_offset,
- int qp,
- int last_slice)
+ struct intel_encoder_context *encoder_context,
+ VAEncSliceParameterBufferH264 *slice_param,
+ int head_offset,
+ int qp,
+ int last_slice)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int total_mbs = slice_param->num_macroblocks;
+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
int number_mb_cmds = 128;
- int starting_mb = 0;
- int last_object = 0;
- int first_object = 1;
- int i;
+ int starting_offset = 0;
int mb_x, mb_y;
- int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
-
- for (i = 0; i < total_mbs / number_mb_cmds; i++) {
- last_object = (total_mbs - starting_mb) == number_mb_cmds;
- mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
- mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
- assert(mb_x <= 255 && mb_y <= 255);
+ int last_mb, slice_end_x, slice_end_y;
+ int remaining_mb = total_mbs;
+ uint32_t fwd_ref , bwd_ref, mb_flag;
- starting_mb += number_mb_cmds;
+ last_mb = slice_param->macroblock_address + total_mbs - 1;
+ slice_end_x = last_mb % width_in_mbs;
+ slice_end_y = last_mb / width_in_mbs;
- gen75_mfc_batchbuffer_emit_object_command(batch,
- index,
- head_offset,
- batchbuffer_offset,
- head_size,
- tail_size,
- number_mb_cmds,
- first_object,
- last_object,
- last_slice,
- mb_x,
- mb_y,
- width_in_mbs,
- qp);
-
- if (first_object) {
- head_offset += head_size;
- batchbuffer_offset += head_size;
- }
-
- if (last_object) {
- head_offset += tail_size;
- batchbuffer_offset += tail_size;
- }
-
- batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
+ if (slice_type == SLICE_TYPE_I) {
+ fwd_ref = 0;
+ bwd_ref = 0;
+ mb_flag = 1;
+ } else {
+ fwd_ref = vme_context->ref_index_in_mb[0];
+ bwd_ref = vme_context->ref_index_in_mb[1];
+ mb_flag = 0;
+ }
- first_object = 0;
+ if (width_in_mbs >= 100) {
+ number_mb_cmds = width_in_mbs / 5;
+ } else if (width_in_mbs >= 80) {
+ number_mb_cmds = width_in_mbs / 4;
+ } else if (width_in_mbs >= 60) {
+ number_mb_cmds = width_in_mbs / 3;
+ } else if (width_in_mbs >= 40) {
+ number_mb_cmds = width_in_mbs / 2;
+ } else {
+ number_mb_cmds = width_in_mbs;
}
- if (!last_object) {
- last_object = 1;
- number_mb_cmds = total_mbs % number_mb_cmds;
- mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
- mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
- assert(mb_x <= 255 && mb_y <= 255);
- starting_mb += number_mb_cmds;
+ do {
+ if (number_mb_cmds >= remaining_mb) {
+ number_mb_cmds = remaining_mb;
+ }
+ mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
+ mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
gen75_mfc_batchbuffer_emit_object_command(batch,
- index,
- head_offset,
- batchbuffer_offset,
- head_size,
- tail_size,
- number_mb_cmds,
- first_object,
- last_object,
- last_slice,
- mb_x,
- mb_y,
- width_in_mbs,
- qp);
- }
+ mb_flag,
+ head_offset,
+ number_mb_cmds,
+ slice_end_x,
+ slice_end_y,
+ mb_x,
+ mb_y,
+ width_in_mbs,
+ qp,
+ fwd_ref,
+ bwd_ref);
+
+ head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
+ remaining_mb -= number_mb_cmds;
+ starting_offset += number_mb_cmds;
+ } while (remaining_mb > 0);
}
/*
* return size in Owords (16bytes)
*/
-static int
+static void
gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int slice_index,
- int batchbuffer_offset)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
@@ -1526,17 +1509,18 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
unsigned int rate_control_mode = encoder_context->rate_control_mode;
- unsigned char *slice_header = NULL;
- int slice_header_length_in_bits = 0;
unsigned int tail_data[] = { 0x0, 0x0 };
long head_offset;
- int old_used = intel_batchbuffer_used_size(slice_batch), used;
- unsigned short head_size, tail_size;
int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+ int qp_slice;
+ qp_slice = qp;
if (rate_control_mode == VA_RC_CBR) {
qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
- pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ if (encode_state->slice_header_index[slice_index] == 0) {
+ pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ qp_slice = qp;
+ }
}
/* only support for 8-bit pixel bit-depth */
@@ -1545,40 +1529,35 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
assert(qp >= 0 && qp < 52);
- head_offset = old_used / 16;
gen75_mfc_avc_slice_state(ctx,
- pPicParameter,
- pSliceParameter,
- encode_state,
- encoder_context,
- (rate_control_mode == VA_RC_CBR),
- qp,
- slice_batch);
+ pPicParameter,
+ pSliceParameter,
+ encode_state,
+ encoder_context,
+ (rate_control_mode == VA_RC_CBR),
+ qp_slice,
+ slice_batch);
if (slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
- slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
-
- // slice hander
- mfc_context->insert_object(ctx,
- encoder_context,
- (unsigned int *)slice_header,
- ALIGN(slice_header_length_in_bits, 32) >> 5,
- slice_header_length_in_bits & 0x1f,
- 5, /* first 5 bytes are start code + nal unit type */
- 1,
- 0,
- 1,
- slice_batch);
- free(slice_header);
+ intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
- used = intel_batchbuffer_used_size(slice_batch);
- head_size = (used - old_used) / 16;
- old_used = used;
+ head_offset = intel_batchbuffer_used_size(slice_batch);
+
+ slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
+
+ gen75_mfc_avc_batchbuffer_slice_command(ctx,
+ encoder_context,
+ pSliceParameter,
+ head_offset,
+ qp,
+ last_slice);
- /* tail */
+
+ /* Aligned for tail */
+ intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
if (last_slice) {
mfc_context->insert_object(ctx,
encoder_context,
@@ -1603,48 +1582,41 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
slice_batch);
}
- intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
- used = intel_batchbuffer_used_size(slice_batch);
- tail_size = (used - old_used) / 16;
-
-
- gen75_mfc_avc_batchbuffer_slice_command(ctx,
- encoder_context,
- pSliceParameter,
- head_offset,
- head_size,
- tail_size,
- batchbuffer_offset,
- qp,
- last_slice);
-
- return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
+ return;
}
static void
gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct intel_batchbuffer *batch = encoder_context->base.batch;
- int i, size, offset = 0;
+ int i;
intel_batchbuffer_start_atomic(batch, 0x4000);
gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
- size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
- offset += size;
+ gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
+ }
+ {
+ struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
+ intel_batchbuffer_align(slice_batch, 8);
+ BEGIN_BCS_BATCH(slice_batch, 2);
+ OUT_BCS_BATCH(slice_batch, 0);
+ OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BCS_BATCH(slice_batch);
+ mfc_context->aux_batchbuffer = NULL;
+ intel_batchbuffer_free(slice_batch);
}
-
intel_batchbuffer_end_atomic(batch);
intel_batchbuffer_flush(batch);
}
static void
gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
@@ -1654,23 +1626,23 @@ gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx,
static dri_bo *
gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
- dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
- return mfc_context->mfc_batchbuffer_surface.bo;
+ return mfc_context->aux_batchbuffer_surface.bo;
}
#endif
static void
gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
dri_bo *slice_batch_bo;
@@ -1681,7 +1653,7 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
return;
}
-#ifdef MFC_SOFTWARE_HASWELL
+#if MFC_SOFTWARE_HASWELL
slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
#else
slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
@@ -1711,8 +1683,8 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
static VAStatus
gen75_mfc_avc_encode_picture(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
unsigned int rate_control_mode = encoder_context->rate_control_mode;
@@ -1768,9 +1740,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
VAEncPictureParameterBufferMPEG2 *pic_param;
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
BEGIN_BCS_BATCH(batch, 13);
OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
@@ -1795,7 +1769,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1 << 31 | /* slice concealment */
(height_in_mbs - 1) << 16 |
(width_in_mbs - 1));
- OUT_BCS_BATCH(batch, 0);
+ if (slice_param && slice_param->quantiser_scale_code >= 14)
+ OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
+ else
+ OUT_BCS_BATCH(batch, 0);
+
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch,
0xFFF << 16 | /* InterMBMaxSize */
@@ -1814,7 +1792,7 @@ static void
gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
unsigned char intra_qm[64] = {
- 8, 16, 19, 22, 26, 27, 29, 34,
+ 8, 16, 19, 22, 26, 27, 29, 34,
16, 16, 22, 24, 27, 29, 34, 37,
19, 22, 26, 27, 29, 34, 34, 38,
22, 22, 26, 27, 29, 34, 37, 40,
@@ -1843,14 +1821,14 @@ static void
gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
unsigned short intra_fqm[64] = {
- 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
- 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
- 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
- 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
- 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
- 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
- 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
- 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
+ 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
+ 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
+ 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
+ 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
+ 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
+ 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
+ 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
+ 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
};
unsigned short non_intra_fqm[64] = {
@@ -2228,20 +2206,39 @@ gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
0xff,
slice_batch);
} else {
- gen75_mfc_mpeg2_pak_object_inter(ctx,
- encode_state,
- encoder_context,
- msg,
- width_in_mbs, height_in_mbs,
- h_pos, v_pos,
- first_mb_in_slice,
- last_mb_in_slice,
- first_mb_in_slice_group,
- last_mb_in_slice_group,
- slice_param->quantiser_scale_code,
- 0,
- 0xff,
- slice_batch);
+ int inter_rdo, intra_rdo;
+ inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
+ intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
+
+ if (intra_rdo < inter_rdo)
+ gen75_mfc_mpeg2_pak_object_intra(ctx,
+ encoder_context,
+ h_pos, v_pos,
+ first_mb_in_slice,
+ last_mb_in_slice,
+ first_mb_in_slice_group,
+ last_mb_in_slice_group,
+ 0x1a,
+ slice_param->quantiser_scale_code,
+ 0x3f,
+ 0,
+ 0xff,
+ slice_batch);
+ else
+ gen75_mfc_mpeg2_pak_object_inter(ctx,
+ encode_state,
+ encoder_context,
+ msg,
+ width_in_mbs, height_in_mbs,
+ h_pos, v_pos,
+ first_mb_in_slice,
+ last_mb_in_slice,
+ first_mb_in_slice_group,
+ last_mb_in_slice_group,
+ slice_param->quantiser_scale_code,
+ 0,
+ 0xff,
+ slice_batch);
}
}
@@ -2286,18 +2283,13 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct intel_batchbuffer *batch;
- VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
dri_bo *batch_bo;
int i;
- int buffer_size;
- int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
- int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
- buffer_size = width_in_mbs * height_in_mbs * 64;
- batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
+ batch = mfc_context->aux_batchbuffer;
batch_bo = batch->buffer;
for (i = 0; i < encode_state->num_slice_params_ext; i++) {
@@ -2318,6 +2310,7 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
dri_bo_reference(batch_bo);
intel_batchbuffer_free(batch);
+ mfc_context->aux_batchbuffer = NULL;
return batch_bo;
}
@@ -2385,7 +2378,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx,
/* reconstructed surface */
obj_surface = encode_state->reconstructed_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
mfc_context->pre_deblocking_output.bo = obj_surface->bo;
dri_bo_reference(mfc_context->pre_deblocking_output.bo);
mfc_context->surface_state.width = obj_surface->orig_width;
@@ -2439,7 +2432,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx,
dri_bo_map(bo, 1);
coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
coded_buffer_segment->mapped = 0;
- coded_buffer_segment->codec = CODED_MPEG2;
+ coded_buffer_segment->codec = encoder_context->codec;
dri_bo_unmap(bo);
return vaStatus;
@@ -2516,16 +2509,18 @@ gen75_mfc_context_destroy(void *context)
}
static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
- VAProfile profile,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
VAStatus vaStatus;
switch (profile) {
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
break;
@@ -2563,7 +2558,7 @@ Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *
i965_gpe_load_kernels(ctx,
&mfc_context->gpe_context,
gen75_mfc_kernels,
- NUM_MFC_KERNEL);
+ 1);
mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
mfc_context->set_surface_state = gen75_mfc_surface_state;
diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
index 11644d6..299f2b5 100644
--- a/src/gen75_mfd.c
+++ b/src/gen75_mfd.c
@@ -67,6 +67,7 @@ gen75_mfd_init_avc_surface(VADriverContextP ctx,
if (!gen7_avc_surface) {
gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+ gen7_avc_surface->frame_store_id = -1;
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = gen7_avc_surface;
}
@@ -137,12 +138,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -150,7 +155,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
@@ -566,7 +571,7 @@ gen75_mfd_avc_img_state(VADriverContextP ctx,
BEGIN_BCS_BATCH(batch, 17);
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
OUT_BCS_BATCH(batch,
- width_in_mbs * height_in_mbs);
+ (width_in_mbs * height_in_mbs - 1));
OUT_BCS_BATCH(batch,
((height_in_mbs - 1) << 16) |
((width_in_mbs - 1) << 0));
@@ -628,25 +633,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx,
}
}
-static void
+static inline void
gen75_mfd_avc_picid_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct gen7_mfd_context *gen7_mfd_context)
{
- struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-
- BEGIN_BCS_BATCH(batch, 10);
- OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
- OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- ADVANCE_BCS_BATCH(batch);
+ gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
+ gen7_mfd_context->reference_surface);
}
static void
@@ -660,7 +653,7 @@ gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
struct object_surface *obj_surface;
GenAvcSurface *gen7_avc_surface;
VAPictureH264 *va_pic;
- int i, j;
+ int i;
BEGIN_BCS_BATCH(batch, 71);
OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
@@ -700,26 +693,14 @@ gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
/* POC List */
for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
- if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
- int found = 0;
-
- assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
+ obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
- for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
- va_pic = &pic_param->ReferenceFrames[j];
-
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
+ if (obj_surface) {
+ const VAPictureH264 * const va_pic = avc_find_picture(
+ obj_surface->base.id, pic_param->ReferenceFrames,
+ ARRAY_ELEMS(pic_param->ReferenceFrames));
- if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
- found = 1;
- break;
- }
- }
-
- assert(found == 1);
- assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
-
+ assert(va_pic != NULL);
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
} else {
@@ -747,7 +728,7 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx,
struct object_surface *obj_surface;
GenAvcSurface *gen7_avc_surface;
VAPictureH264 *va_pic;
- int i, j;
+ int i;
if (IS_STEPPING_BPLUS(i965)) {
gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
@@ -807,26 +788,14 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx,
/* POC List */
for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
- if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
- int found = 0;
+ obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
- assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
+ if (obj_surface) {
+ const VAPictureH264 * const va_pic = avc_find_picture(
+ obj_surface->base.id, pic_param->ReferenceFrames,
+ ARRAY_ELEMS(pic_param->ReferenceFrames));
- for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
- va_pic = &pic_param->ReferenceFrames[j];
-
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
-
- if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
- found = 1;
- break;
- }
- }
-
- assert(found == 1);
- assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
-
+ assert(va_pic != NULL);
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
} else {
@@ -843,6 +812,15 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx,
}
static void
+gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
+}
+
+static void
gen75_mfd_avc_slice_state(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
@@ -1074,7 +1052,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
- intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
+ gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
+ gen7_mfd_context->reference_surface);
width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
@@ -1082,20 +1061,12 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
- obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
-
- /* initial uv component for YUV400 case */
- if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
- unsigned int uv_offset = obj_surface->width * obj_surface->height;
- unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
-
- drm_intel_gem_bo_map_gtt(obj_surface->bo);
- memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
- drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
- }
+ if (pic_param->pic_fields.bits.reference_pic_flag)
+ obj_surface->flags |= SURFACE_REFERENCED;
+ else
+ obj_surface->flags &= ~SURFACE_REFERENCED;
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
@@ -1169,8 +1140,8 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx,
gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
- gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
+ gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
for (j = 0; j < decode_state->num_slice_params; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
@@ -1183,6 +1154,9 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx,
else
next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+ if (j == 0 && slice_param->first_mb_in_slice)
+ gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
+
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
assert((slice_param->slice_type == SLICE_TYPE_I) ||
@@ -1233,7 +1207,7 @@ gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
@@ -1569,7 +1543,7 @@ gen75_mfd_vc1_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
@@ -2171,13 +2145,13 @@ gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
struct object_surface *obj_surface;
VAPictureParameterBufferJPEGBaseline *pic_param;
int subsampling = SUBSAMPLE_YUV420;
- int fourcc = VA_FOURCC('I', 'M', 'C', '3');
+ int fourcc = VA_FOURCC_IMC3;
pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
if (pic_param->num_components == 1) {
subsampling = SUBSAMPLE_YUV400;
- fourcc = VA_FOURCC('Y', '8', '0', '0');
+ fourcc = VA_FOURCC_Y800;
} else if (pic_param->num_components == 3) {
int h1 = pic_param->components[0].h_sampling_factor;
int h2 = pic_param->components[1].h_sampling_factor;
@@ -2189,31 +2163,31 @@ gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
if (h1 == 2 && h2 == 1 && h3 == 1 &&
v1 == 2 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV420;
- fourcc = VA_FOURCC('I', 'M', 'C', '3');
+ fourcc = VA_FOURCC_IMC3;
} else if (h1 == 2 && h2 == 1 && h3 == 1 &&
v1 == 1 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV422H;
- fourcc = VA_FOURCC('4', '2', '2', 'H');
+ fourcc = VA_FOURCC_422H;
} else if (h1 == 1 && h2 == 1 && h3 == 1 &&
v1 == 1 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV444;
- fourcc = VA_FOURCC('4', '4', '4', 'P');
+ fourcc = VA_FOURCC_444P;
} else if (h1 == 4 && h2 == 1 && h3 == 1 &&
v1 == 1 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV411;
- fourcc = VA_FOURCC('4', '1', '1', 'P');
+ fourcc = VA_FOURCC_411P;
} else if (h1 == 1 && h2 == 1 && h3 == 1 &&
v1 == 2 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV422V;
- fourcc = VA_FOURCC('4', '2', '2', 'V');
+ fourcc = VA_FOURCC_422V;
} else if (h1 == 2 && h2 == 1 && h3 == 1 &&
v1 == 2 && v2 == 2 && v3 == 2) {
subsampling = SUBSAMPLE_YUV422H;
- fourcc = VA_FOURCC('4', '2', '2', 'H');
+ fourcc = VA_FOURCC_422H;
} else if (h2 == 2 && h2 == 2 && h3 == 2 &&
v1 == 2 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV422V;
- fourcc = VA_FOURCC('4', '2', '2', 'V');
+ fourcc = VA_FOURCC_422V;
} else
assert(0);
} else {
@@ -2466,18 +2440,6 @@ gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
/* Workaround for JPEG decoding on Ivybridge */
-VAStatus
-i965_DestroySurfaces(VADriverContextP ctx,
- VASurfaceID *surface_list,
- int num_surfaces);
-VAStatus
-i965_CreateSurfaces(VADriverContextP ctx,
- int width,
- int height,
- int format,
- int num_surfaces,
- VASurfaceID *surfaces);
-
static struct {
int width;
int height;
@@ -2520,7 +2482,7 @@ gen75_jpeg_wa_init(VADriverContextP ctx,
obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
assert(obj_surface);
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
@@ -2826,7 +2788,7 @@ gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
OUT_BCS_BATCH(batch,
- width_in_mbs * height_in_mbs);
+ (width_in_mbs * height_in_mbs - 1));
OUT_BCS_BATCH(batch,
((height_in_mbs - 1) << 16) |
((width_in_mbs - 1) << 0));
@@ -3191,9 +3153,11 @@ gen75_mfd_decode_picture(VADriverContextP ctx,
gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
+ case VAProfileH264StereoHigh:
+ case VAProfileH264MultiviewHigh:
gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
break;
@@ -3285,9 +3249,11 @@ gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config
gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
+ case VAProfileH264StereoHigh:
+ case VAProfileH264MultiviewHigh:
gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
break;
default:
diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c
index e2344aa..6978d4b 100644
--- a/src/gen75_picture_process.c
+++ b/src/gen75_picture_process.c
@@ -38,12 +38,6 @@
#include "i965_post_processing.h"
#include "gen75_picture_process.h"
-extern void
-i965_proc_picture(VADriverContextP ctx,
- VAProfile profile,
- union codec_state *codec_state,
- struct hw_context *hw_context);
-
extern struct hw_context *
i965_proc_context_init(VADriverContextP ctx,
struct object_config *obj_config);
@@ -63,8 +57,8 @@ gen75_vpp_fmt_cvt(VADriverContextP ctx,
proc_ctx->vpp_fmt_cvt_ctx = i965_proc_context_init(ctx, NULL);
}
- i965_proc_picture(ctx, profile, codec_state,
- proc_ctx->vpp_fmt_cvt_ctx);
+ va_status = i965_proc_picture(ctx, profile, codec_state,
+ proc_ctx->vpp_fmt_cvt_ctx);
return va_status;
}
@@ -75,6 +69,7 @@ gen75_vpp_vebox(VADriverContextP ctx,
{
VAStatus va_status = VA_STATUS_SUCCESS;
VAProcPipelineParameterBuffer* pipeline_param = proc_ctx->pipeline_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
/* vpp features based on VEBox fixed function */
if(proc_ctx->vpp_vebox_ctx == NULL) {
@@ -85,7 +80,10 @@ gen75_vpp_vebox(VADriverContextP ctx,
proc_ctx->vpp_vebox_ctx->surface_input_object = proc_ctx->surface_pipeline_input_object;
proc_ctx->vpp_vebox_ctx->surface_output_object = proc_ctx->surface_render_output_object;
- va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx);
+ if (IS_HASWELL(i965->intel.device_info))
+ va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx);
+ else if (IS_GEN8(i965->intel.device_info))
+ va_status = gen8_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx);
return va_status;
}
@@ -97,14 +95,14 @@ gen75_vpp_gpe(VADriverContextP ctx,
VAStatus va_status = VA_STATUS_SUCCESS;
if(proc_ctx->vpp_gpe_ctx == NULL){
- proc_ctx->vpp_gpe_ctx = gen75_gpe_context_init(ctx);
+ proc_ctx->vpp_gpe_ctx = vpp_gpe_context_init(ctx);
}
proc_ctx->vpp_gpe_ctx->pipeline_param = proc_ctx->pipeline_param;
proc_ctx->vpp_gpe_ctx->surface_pipeline_input_object = proc_ctx->surface_pipeline_input_object;
proc_ctx->vpp_gpe_ctx->surface_output_object = proc_ctx->surface_render_output_object;
- va_status = gen75_gpe_process_picture(ctx, proc_ctx->vpp_gpe_ctx);
+ va_status = vpp_gpe_process_picture(ctx, proc_ctx->vpp_gpe_ctx);
return va_status;
}
@@ -123,29 +121,43 @@ gen75_proc_picture(VADriverContextP ctx,
(VAProcPipelineParameterBuffer *)proc_st->pipeline_param->buffer;
struct object_surface *obj_dst_surf = NULL;
struct object_surface *obj_src_surf = NULL;
+ VAStatus status;
+
proc_ctx->pipeline_param = pipeline_param;
- assert(proc_st->current_render_target != VA_INVALID_SURFACE);
if (proc_st->current_render_target == VA_INVALID_SURFACE ||
- pipeline_param->surface == VA_INVALID_SURFACE)
+ pipeline_param->surface == VA_INVALID_SURFACE) {
+ status = VA_STATUS_ERROR_INVALID_SURFACE;
goto error;
+ }
obj_dst_surf = SURFACE(proc_st->current_render_target);
- if (!obj_dst_surf)
+ if (!obj_dst_surf) {
+ status = VA_STATUS_ERROR_INVALID_SURFACE;
goto error;
+ }
obj_src_surf = SURFACE(proc_ctx->pipeline_param->surface);
- if (!obj_src_surf)
+ if (!obj_src_surf) {
+ status = VA_STATUS_ERROR_INVALID_SURFACE;
+ goto error;
+ }
+
+ if (!obj_src_surf->bo) {
+ status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
goto error;
+ }
- if (pipeline_param->num_filters && !pipeline_param->filters)
+ if (pipeline_param->num_filters && !pipeline_param->filters) {
+ status = VA_STATUS_ERROR_INVALID_PARAMETER;
goto error;
+ }
if (!obj_dst_surf->bo) {
unsigned int is_tiled = 0;
- unsigned int fourcc = VA_FOURCC('N','V','1','2');
+ unsigned int fourcc = VA_FOURCC_NV12;
int sampling = SUBSAMPLE_YUV420;
i965_check_alloc_surface_bo(ctx, obj_dst_surf, is_tiled, fourcc, sampling);
}
@@ -166,23 +178,25 @@ gen75_proc_picture(VADriverContextP ctx,
if (!obj_buf ||
!obj_buf->buffer_store ||
- !obj_buf->buffer_store->buffer)
+ !obj_buf->buffer_store->buffer) {
+ status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
goto error;
+ }
VAProcFilterParameterBuffer* filter =
(VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
- if (filter->type == VAProcFilterNoiseReduction ||
- filter->type == VAProcFilterDeinterlacing ||
+ if (filter->type == VAProcFilterNoiseReduction ||
+ filter->type == VAProcFilterDeinterlacing ||
+ filter->type == VAProcFilterSkinToneEnhancement ||
filter->type == VAProcFilterColorBalance){
gen75_vpp_vebox(ctx, proc_ctx);
}else if(filter->type == VAProcFilterSharpening){
- assert(obj_src_surf->fourcc == VA_FOURCC('N','V','1','2') &&
- obj_dst_surf->fourcc == VA_FOURCC('N','V','1','2'));
-
- if (obj_src_surf->fourcc != VA_FOURCC('N', 'V', '1', '2') ||
- obj_dst_surf->fourcc != VA_FOURCC('N', 'V', '1', '2'))
+ if (obj_src_surf->fourcc != VA_FOURCC_NV12 ||
+ obj_dst_surf->fourcc != VA_FOURCC_NV12) {
+ status = VA_STATUS_ERROR_UNIMPLEMENTED;
goto error;
+ }
gen75_vpp_gpe(ctx, proc_ctx);
}
@@ -191,18 +205,19 @@ gen75_proc_picture(VADriverContextP ctx,
for (i = 0; i < pipeline_param->num_filters; i++){
struct object_buffer * obj_buf = BUFFER(pipeline_param->filters[i]);
- assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
-
if (!obj_buf ||
!obj_buf->buffer_store ||
- !obj_buf->buffer_store->buffer)
+ !obj_buf->buffer_store->buffer) {
+ status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
goto error;
+ }
VAProcFilterParameterBuffer* filter =
(VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
- if (filter->type != VAProcFilterNoiseReduction &&
- filter->type != VAProcFilterDeinterlacing &&
+ if (filter->type != VAProcFilterNoiseReduction &&
+ filter->type != VAProcFilterDeinterlacing &&
+ filter->type != VAProcFilterSkinToneEnhancement &&
filter->type != VAProcFilterColorBalance) {
printf("Do not support multiply filters outside vebox pipeline \n");
assert(0);
@@ -214,7 +229,7 @@ gen75_proc_picture(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
error:
- return VA_STATUS_ERROR_INVALID_PARAMETER;
+ return status;
}
static void
@@ -235,7 +250,7 @@ gen75_proc_context_destroy(void *hw_context)
}
if(proc_ctx->vpp_gpe_ctx){
- gen75_gpe_context_destroy(ctx,proc_ctx->vpp_gpe_ctx);
+ vpp_gpe_context_destroy(ctx,proc_ctx->vpp_gpe_ctx);
proc_ctx->vpp_gpe_ctx = NULL;
}
diff --git a/src/gen75_vme.c b/src/gen75_vme.c
index b796505..576e91a 100644
--- a/src/gen75_vme.c
+++ b/src/gen75_vme.c
@@ -38,14 +38,6 @@
#include "gen6_vme.h"
#include "gen6_mfc.h"
-#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
-
-#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6)
-
#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
@@ -113,7 +105,7 @@ static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = {
};
static const uint32_t gen75_vme_mpeg2_inter_frame[][4] = {
-#include "shaders/vme/mpeg2_inter_frame_haswell.g75b"
+#include "shaders/vme/mpeg2_inter_haswell.g75b"
};
static const uint32_t gen75_vme_mpeg2_batchbuffer[][4] = {
@@ -264,7 +256,6 @@ gen75_vme_surface_setup(VADriverContextP ctx,
struct intel_encoder_context *encoder_context)
{
struct object_surface *obj_surface;
- struct i965_driver_data *i965 = i965_driver_data(ctx);
/*Setup surfaces state*/
/* current picture for encoding */
@@ -276,43 +267,14 @@ gen75_vme_surface_setup(VADriverContextP ctx,
if (!is_intra) {
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
int slice_type;
- struct object_surface *slice_obj_surface;
- int ref_surface_id;
slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
- if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList0[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[0];
- }
- /* reference 0 */
- if (obj_surface && obj_surface->bo)
- gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
- }
- if (slice_type == SLICE_TYPE_B) {
- /* reference 1 */
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList1[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[0];
- }
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen75_vme_source_surface_state);
- obj_surface = encode_state->reference_objects[1];
- if (obj_surface && obj_surface->bo)
- gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
- }
+ if (slice_type == SLICE_TYPE_B)
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen75_vme_source_surface_state);
}
/* VME output */
@@ -374,17 +336,15 @@ static VAStatus gen75_vme_constant_setup(VADriverContextP ctx,
vme_state_message = (unsigned int *)vme_context->vme_state_message;
- if (encoder_context->profile == VAProfileH264Baseline ||
- encoder_context->profile == VAProfileH264Main ||
- encoder_context->profile == VAProfileH264High) {
+ if (encoder_context->codec == CODEC_H264 ||
+ encoder_context->codec == CODEC_H264_MVC) {
if (vme_context->h264_level >= 30) {
mv_num = 16;
if (vme_context->h264_level >= 31)
mv_num = 8;
}
- } else if (encoder_context->profile == VAProfileMPEG2Simple ||
- encoder_context->profile == VAProfileMPEG2Main) {
+ } else if (encoder_context->codec == CODEC_MPEG2) {
mv_num = 2;
}
@@ -501,10 +461,9 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
vme_state_message[i] = 0;
}
- switch (encoder_context->profile) {
- case VAProfileH264Baseline:
- case VAProfileH264Main:
- case VAProfileH264High:
+ switch (encoder_context->codec) {
+ case CODEC_H264:
+ case CODEC_H264_MVC:
gen75_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
break;
@@ -575,7 +534,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
/*inline data */
*command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
- *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+ *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
i += 1;
}
@@ -619,44 +578,50 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx,
int kernel_shader;
bool allow_hwscore = true;
int s;
-
- for (s = 0; s < encode_state->num_slice_params_ext; s++) {
- pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
- if ((pSliceParameter->macroblock_address % width_in_mbs)) {
- allow_hwscore = false;
- break;
- }
+ unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
+
+ if (is_low_quality)
+ allow_hwscore = false;
+ else {
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ if ((pSliceParameter->macroblock_address % width_in_mbs)) {
+ allow_hwscore = false;
+ break;
+ }
+ }
}
+
if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
(pSliceParameter->slice_type == SLICE_TYPE_I)) {
kernel_shader = VME_INTRA_SHADER;
- } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
- (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
+ } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
+ (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
kernel_shader = VME_INTER_SHADER;
- } else {
+ } else {
kernel_shader = VME_BINTER_SHADER;
if (!allow_hwscore)
- kernel_shader = VME_INTER_SHADER;
- }
+ kernel_shader = VME_INTER_SHADER;
+ }
if (allow_hwscore)
gen7_vme_walker_fill_vme_batchbuffer(ctx,
- encode_state,
- width_in_mbs, height_in_mbs,
- kernel_shader,
- pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
- encoder_context);
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
+ encoder_context);
else
gen75_vme_fill_vme_batchbuffer(ctx,
- encode_state,
- width_in_mbs, height_in_mbs,
- kernel_shader,
- pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
- encoder_context);
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
+ encoder_context);
intel_batchbuffer_start_atomic(batch, 0x1000);
gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
BEGIN_BATCH(batch, 2);
- OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(batch,
vme_context->vme_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
@@ -921,21 +886,54 @@ gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
struct intel_batchbuffer *batch = encoder_context->base.batch;
+ VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+ bool allow_hwscore = true;
+ int s;
+ int kernel_shader;
- gen75_vme_mpeg2_fill_vme_batchbuffer(ctx,
- encode_state,
- width_in_mbs, height_in_mbs,
- is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
- 0,
- encoder_context);
+ pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ int j;
+ VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
+
+ for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
+ if (slice_param->macroblock_address % width_in_mbs) {
+ allow_hwscore = false;
+ break;
+ }
+ }
+ }
+
+ pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ if (pic_param->picture_type == VAEncPictureTypeIntra) {
+ allow_hwscore = false;
+ kernel_shader = VME_INTRA_SHADER;
+ } else {
+ kernel_shader = VME_INTER_SHADER;
+ }
+
+ if (allow_hwscore)
+ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ encoder_context);
+ else
+ gen75_vme_mpeg2_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ 0,
+ encoder_context);
intel_batchbuffer_start_atomic(batch, 0x1000);
gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
BEGIN_BATCH(batch, 2);
- OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(batch,
vme_context->vme_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
@@ -953,10 +951,19 @@ gen75_vme_mpeg2_prepare(VADriverContextP ctx,
VAStatus vaStatus = VA_STATUS_SUCCESS;
VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ if ((!vme_context->mpeg2_level) ||
+ (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
+ vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
+ }
+
/*Setup all the memory object*/
gen75_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
gen75_vme_interface_setup(ctx, encode_state, encoder_context);
gen75_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
+ intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
gen75_vme_constant_setup(ctx, encode_state, encoder_context);
/*Programing media pipeline*/
@@ -1007,19 +1014,17 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
{
struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
struct i965_kernel *vme_kernel_list = NULL;
- int i965_kernel_num;
+ int i965_kernel_num;
- switch (encoder_context->profile) {
- case VAProfileH264Baseline:
- case VAProfileH264Main:
- case VAProfileH264High:
+ switch (encoder_context->codec) {
+ case CODEC_H264:
+ case CODEC_H264_MVC:
vme_kernel_list = gen75_vme_kernels;
encoder_context->vme_pipeline = gen75_vme_pipeline;
i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel);
break;
- case VAProfileMPEG2Simple:
- case VAProfileMPEG2Main:
+ case CODEC_MPEG2:
vme_kernel_list = gen75_vme_mpeg2_kernels;
encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline;
i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
@@ -1041,9 +1046,9 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
- vme_context->gpe_context.vfe_state.num_urb_entries = 16;
+ vme_context->gpe_context.vfe_state.num_urb_entries = 64;
vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
- vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
+ vme_context->gpe_context.vfe_state.urb_entry_size = 16;
vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
gen7_vme_scoreboard_init(ctx, vme_context);
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 70f229b..042e4e6 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -33,6 +33,7 @@
#include "intel_batchbuffer.h"
#include "intel_driver.h"
+#include "i965_structs.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "gen75_vpp_gpe.h"
@@ -40,30 +41,16 @@
#define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
#define MAX_MEDIA_SURFACES_GEN6 34
-#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+#define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
+#define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
-#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * (index))
-#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
+#define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
+#define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
#define CURBE_ALLOCATION_SIZE 37
#define CURBE_TOTAL_DATA_LENGTH (4 * 32)
#define CURBE_URB_ENTRY_LENGTH 4
-extern VAStatus
-i965_CreateSurfaces(VADriverContextP ctx,
- int width,
- int height,
- int format,
- int num_surfaces,
- VASurfaceID *surfaces);
-
-extern VAStatus
-i965_DestroySurfaces(VADriverContextP ctx,
- VASurfaceID *surface_list,
- int num_surfaces);
-
/* Shaders information for sharpening */
static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
#include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
@@ -98,8 +85,43 @@ static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
},
};
+/* sharpening kernels for Broadwell */
+static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
+ #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
+};
+static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
+ #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
+};
+static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
+ #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
+};
+
+static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
+ {
+ "vpp: sharpening(horizontal blur)",
+ VPP_GPE_SHARPENING,
+ gen8_gpe_sharpening_h_blur,
+ sizeof(gen8_gpe_sharpening_h_blur),
+ NULL
+ },
+ {
+ "vpp: sharpening(vertical blur)",
+ VPP_GPE_SHARPENING,
+ gen8_gpe_sharpening_v_blur,
+ sizeof(gen8_gpe_sharpening_v_blur),
+ NULL
+ },
+ {
+ "vpp: sharpening(unmask)",
+ VPP_GPE_SHARPENING,
+ gen8_gpe_sharpening_unmask,
+ sizeof(gen8_gpe_sharpening_unmask),
+ NULL
+ },
+};
+
static VAStatus
-gpe_surfaces_setup(VADriverContextP ctx,
+gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct object_surface *obj_surface;
@@ -111,44 +133,44 @@ gpe_surfaces_setup(VADriverContextP ctx,
for( i = 0; i < input_surface_sum; i += 2){
obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
assert(obj_surface);
- vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx,
- &vpp_gpe_ctx->gpe_ctx,
- obj_surface,
- BINDING_TABLE_OFFSET(i),
- SURFACE_STATE_OFFSET(i));
-
- vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx,
- &vpp_gpe_ctx->gpe_ctx,
- obj_surface,
- BINDING_TABLE_OFFSET(i + 1),
- SURFACE_STATE_OFFSET(i + 1));
+ gen7_gpe_media_rw_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN7(i),
+ SURFACE_STATE_OFFSET_GEN7(i));
+
+ gen75_gpe_media_chroma_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN7(i + 1),
+ SURFACE_STATE_OFFSET_GEN7(i + 1));
}
/* Binding output NV12 surface(Luma + Chroma) */
obj_surface = vpp_gpe_ctx->surface_output_object;
assert(obj_surface);
- vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx,
- &vpp_gpe_ctx->gpe_ctx,
- obj_surface,
- BINDING_TABLE_OFFSET(input_surface_sum),
- SURFACE_STATE_OFFSET(input_surface_sum));
- vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx,
- &vpp_gpe_ctx->gpe_ctx,
- obj_surface,
- BINDING_TABLE_OFFSET(input_surface_sum + 1),
- SURFACE_STATE_OFFSET(input_surface_sum + 1));
+ gen7_gpe_media_rw_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
+ SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
+ gen75_gpe_media_chroma_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
+ SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
/* Bind kernel return buffer surface */
- vpp_gpe_ctx->vpp_buffer_surface_setup(ctx,
- &vpp_gpe_ctx->gpe_ctx,
- &vpp_gpe_ctx->vpp_kernel_return,
- BINDING_TABLE_OFFSET((input_surface_sum + 2)),
- SURFACE_STATE_OFFSET(input_surface_sum + 2));
+ gen7_gpe_buffer_suface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ &vpp_gpe_ctx->vpp_kernel_return,
+ BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
+ SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
return VA_STATUS_SUCCESS;
}
static VAStatus
-gpe_interface_setup(VADriverContextP ctx,
+gen75_gpe_process_interface_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct gen6_interface_descriptor_data *desc;
@@ -168,7 +190,7 @@ gpe_interface_setup(VADriverContextP ctx,
desc->desc2.sampler_count = 0; /* FIXME: */
desc->desc2.sampler_state_pointer = 0;
desc->desc3.binding_table_entry_count = 6; /* FIXME: */
- desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
+ desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
desc->desc4.constant_urb_entry_read_offset = 0;
desc->desc4.constant_urb_entry_read_length = 0;
@@ -186,23 +208,21 @@ gpe_interface_setup(VADriverContextP ctx,
}
static VAStatus
-gpe_constant_setup(VADriverContextP ctx,
- struct vpp_gpe_context *vpp_gpe_ctx){
+gen75_gpe_process_constant_fill(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
- /*Copy buffer into CURB*/
- /*
unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
vpp_gpe_ctx->kernel_param_size);
- */
dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
return VA_STATUS_SUCCESS;
}
static VAStatus
-gpe_fill_thread_parameters(VADriverContextP ctx,
+gen75_gpe_process_parameters_fill(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
unsigned int *command_ptr;
@@ -237,7 +257,7 @@ gpe_fill_thread_parameters(VADriverContextP ctx,
}
static VAStatus
-gpe_pipeline_setup(VADriverContextP ctx,
+gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
@@ -245,10 +265,10 @@ gpe_pipeline_setup(VADriverContextP ctx,
gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
- gpe_fill_thread_parameters(ctx, vpp_gpe_ctx);
+ gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
- OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(vpp_gpe_ctx->batch,
vpp_gpe_ctx->vpp_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
@@ -261,7 +281,7 @@ gpe_pipeline_setup(VADriverContextP ctx,
}
static VAStatus
-gpe_process_init(VADriverContextP ctx,
+gen75_gpe_process_init(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
@@ -290,28 +310,28 @@ gpe_process_init(VADriverContextP ctx,
vpp_gpe_ctx->vpp_kernel_return.bo = bo;
dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
- i965_gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
+ vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
return VA_STATUS_SUCCESS;
}
static VAStatus
-gpe_process_prepare(VADriverContextP ctx,
+gen75_gpe_process_prepare(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
/*Setup all the memory object*/
- gpe_surfaces_setup(ctx, vpp_gpe_ctx);
- gpe_interface_setup(ctx, vpp_gpe_ctx);
- gpe_constant_setup(ctx, vpp_gpe_ctx);
+ gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
+ gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
+ //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
/*Programing media pipeline*/
- gpe_pipeline_setup(ctx, vpp_gpe_ctx);
+ gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
return VA_STATUS_SUCCESS;
}
static VAStatus
-gpe_process_run(VADriverContextP ctx,
+gen75_gpe_process_run(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
intel_batchbuffer_flush(vpp_gpe_ctx->batch);
@@ -320,19 +340,285 @@ gpe_process_run(VADriverContextP ctx,
}
static VAStatus
-gen75_gpe_process(VADriverContextP ctx,
+gen75_gpe_process(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
- va_status = gpe_process_init(ctx, vpp_gpe_ctx);
- va_status |=gpe_process_prepare(ctx, vpp_gpe_ctx);
- va_status |=gpe_process_run(ctx, vpp_gpe_ctx);
-
- return va_status;
+
+ va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ return VA_STATUS_SUCCESS;
}
static VAStatus
-gen75_gpe_process_sharpening(VADriverContextP ctx,
+gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ struct object_surface *obj_surface;
+ unsigned int i = 0;
+ unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
+ vpp_gpe_ctx->backward_surf_sum) * 2;
+
+ /* Binding input NV12 surfaces (Luma + Chroma)*/
+ for( i = 0; i < input_surface_sum; i += 2){
+ obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
+ assert(obj_surface);
+ gen8_gpe_media_rw_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN8(i),
+ SURFACE_STATE_OFFSET_GEN8(i));
+
+ gen8_gpe_media_chroma_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN8(i + 1),
+ SURFACE_STATE_OFFSET_GEN8(i + 1));
+ }
+
+ /* Binding output NV12 surface(Luma + Chroma) */
+ obj_surface = vpp_gpe_ctx->surface_output_object;
+ assert(obj_surface);
+ gen8_gpe_media_rw_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
+ SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
+ gen8_gpe_media_chroma_surface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ obj_surface,
+ BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
+ SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
+ /* Bind kernel return buffer surface */
+ gen7_gpe_buffer_suface_setup(ctx,
+ &vpp_gpe_ctx->gpe_ctx,
+ &vpp_gpe_ctx->vpp_kernel_return,
+ BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
+ SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process_interface_setup(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ struct gen8_interface_descriptor_data *desc;
+ dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
+ int i;
+
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ desc = (struct gen8_interface_descriptor_data *)(bo->virtual
+ + vpp_gpe_ctx->gpe_ctx.idrt_offset);
+
+ /*Setup the descritor table*/
+ for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
+ struct i965_kernel *kernel;
+ kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
+ assert(sizeof(*desc) == 32);
+ /*Setup the descritor table*/
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
+ desc->desc3.sampler_count = 0; /* FIXME: */
+ desc->desc3.sampler_state_pointer = 0;
+ desc->desc4.binding_table_entry_count = 6; /* FIXME: */
+ desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
+ desc->desc5.constant_urb_entry_read_offset = 0;
+ desc->desc5.constant_urb_entry_read_length = 0;
+
+ desc++;
+ }
+
+ dri_bo_unmap(bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process_constant_fill(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
+ assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
+ unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
+ memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
+ vpp_gpe_ctx->kernel_param_size);
+ dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process_parameters_fill(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ unsigned int *command_ptr;
+ unsigned int i, size = vpp_gpe_ctx->thread_param_size;
+ unsigned char* position = NULL;
+
+ /* Thread inline data setting*/
+ dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
+ command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
+
+ for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
+ {
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
+ *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+
+ /* copy thread inline data */
+ position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
+ memcpy(command_ptr, position, size);
+ command_ptr += size/sizeof(int);
+
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+ }
+
+ *command_ptr++ = 0;
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+
+ dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
+
+ gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
+
+ gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
+
+ BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
+ OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+ OUT_RELOC(vpp_gpe_ctx->batch,
+ vpp_gpe_ctx->vpp_batchbuffer.bo,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ OUT_BATCH(vpp_gpe_ctx->batch, 0);
+
+ ADVANCE_BATCH(vpp_gpe_ctx->batch);
+
+ intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process_init(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ dri_bo *bo;
+
+ unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
+ (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
+
+ vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
+ vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
+ vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
+
+ unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
+ * vpp_gpe_ctx->vpp_kernel_return.size_block;
+
+ dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vpp batch buffer",
+ batch_buf_size, 0x1000);
+ vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
+ dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
+
+ dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vpp kernel return buffer",
+ kernel_return_size, 0x1000);
+ vpp_gpe_ctx->vpp_kernel_return.bo = bo;
+ dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
+
+ vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process_prepare(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ /*Setup all the memory object*/
+ gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
+ gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
+ //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
+
+ /*Programing media pipeline*/
+ gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process_run(VADriverContextP ctx,
+ struct vpp_gpe_context *vpp_gpe_ctx)
+{
+ intel_batchbuffer_flush(vpp_gpe_ctx->batch);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_gpe_process(VADriverContextP ctx,
+ struct vpp_gpe_context * vpp_gpe_ctx)
+{
+ VAStatus va_status = VA_STATUS_SUCCESS;
+
+ va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+vpp_gpe_process(VADriverContextP ctx,
+ struct vpp_gpe_context * vpp_gpe_ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ if (IS_HASWELL(i965->intel.device_info))
+ return gen75_gpe_process(ctx, vpp_gpe_ctx);
+ else if (IS_GEN8(i965->intel.device_info))
+ return gen8_gpe_process(ctx, vpp_gpe_ctx);
+
+ return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+static VAStatus
+vpp_gpe_process_sharpening(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
@@ -362,9 +648,15 @@ gen75_gpe_process_sharpening(VADriverContextP ctx,
if(vpp_gpe_ctx->is_first_frame){
vpp_gpe_ctx->sub_shader_sum = 3;
- i965_gpe_load_kernels(ctx,
+ struct i965_kernel * vpp_kernels;
+ if (IS_HASWELL(i965->intel.device_info))
+ vpp_kernels = gen75_vpp_sharpening_kernels;
+ else if (IS_GEN8(i965->intel.device_info))
+ vpp_kernels = gen8_vpp_sharpening_kernels;
+
+ vpp_gpe_ctx->gpe_load_kernels(ctx,
&vpp_gpe_ctx->gpe_ctx,
- gen75_vpp_sharpening_kernels,
+ vpp_kernels,
vpp_gpe_ctx->sub_shader_sum);
}
@@ -381,7 +673,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx,
assert(obj_surf);
if (obj_surf) {
- i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC('N','V','1','2'),
+ i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
SUBSAMPLE_YUV420);
vpp_gpe_ctx->surface_tmp_object = obj_surf;
}
@@ -416,10 +708,10 @@ gen75_gpe_process_sharpening(VADriverContextP ctx,
}
vpp_gpe_ctx->sub_shader_index = 0;
- va_status = gen75_gpe_process(ctx, vpp_gpe_ctx);
+ va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
free(vpp_gpe_ctx->thread_param);
- /* Step 2: vertical blur process */
+ /* Step 2: vertical blur process */
vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
vpp_gpe_ctx->forward_surf_sum = 0;
@@ -443,7 +735,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx,
}
vpp_gpe_ctx->sub_shader_index = 1;
- gen75_gpe_process(ctx, vpp_gpe_ctx);
+ vpp_gpe_process(ctx, vpp_gpe_ctx);
free(vpp_gpe_ctx->thread_param);
/* Step 3: apply the blur to original surface */
@@ -471,7 +763,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx,
}
vpp_gpe_ctx->sub_shader_index = 2;
- va_status = gen75_gpe_process(ctx, vpp_gpe_ctx);
+ va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
free(vpp_gpe_ctx->thread_param);
return va_status;
@@ -480,7 +772,7 @@ error:
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
-VAStatus gen75_gpe_process_picture(VADriverContextP ctx,
+VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
@@ -538,7 +830,7 @@ VAStatus gen75_gpe_process_picture(VADriverContextP ctx,
vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
if(filter && filter->type == VAProcFilterSharpening) {
- va_status = gen75_gpe_process_sharpening(ctx, vpp_gpe_ctx);
+ va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
} else {
va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
}
@@ -552,7 +844,7 @@ error:
}
void
-gen75_gpe_context_destroy(VADriverContextP ctx,
+vpp_gpe_context_destroy(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
@@ -561,7 +853,7 @@ gen75_gpe_context_destroy(VADriverContextP ctx,
dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
- i965_gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
+ vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
assert(vpp_gpe_ctx->surface_tmp_object != NULL);
@@ -576,18 +868,19 @@ gen75_gpe_context_destroy(VADriverContextP ctx,
}
struct vpp_gpe_context *
-gen75_gpe_context_init(VADriverContextP ctx)
+vpp_gpe_context_init(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
- gpe_ctx->surface_state_binding_table.length =
- (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
- gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
- gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
+ assert(IS_HASWELL(i965->intel.device_info) ||
+ IS_GEN8(i965->intel.device_info));
- gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
+ vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
+ vpp_gpe_ctx->surface_tmp_object = NULL;
+ vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
+ vpp_gpe_ctx->is_first_frame = 1;
gpe_ctx->vfe_state.max_num_threads = 60 - 1;
gpe_ctx->vfe_state.num_urb_entries = 16;
@@ -595,16 +888,28 @@ gen75_gpe_context_init(VADriverContextP ctx)
gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
- vpp_gpe_ctx->vpp_surface2_setup = gen7_gpe_surface2_setup;
- vpp_gpe_ctx->vpp_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
- vpp_gpe_ctx->vpp_buffer_surface_setup = gen7_gpe_buffer_suface_setup;
- vpp_gpe_ctx->vpp_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup;
- vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
- vpp_gpe_ctx->surface_tmp_object = NULL;
+ if (IS_HASWELL(i965->intel.device_info)) {
+ vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init;
+ vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy;
+ vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels;
+ gpe_ctx->surface_state_binding_table.length =
+ (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
+
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
+ gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
+
+ } else if (IS_GEN8(i965->intel.device_info)) {
+ vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init;
+ vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy;
+ vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels;
+ gpe_ctx->surface_state_binding_table.length =
+ (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
+
+ gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
-
- vpp_gpe_ctx->is_first_frame = 1;
+ }
return vpp_gpe_ctx;
}
diff --git a/src/gen75_vpp_gpe.h b/src/gen75_vpp_gpe.h
index 5232214..5ffee2c 100644
--- a/src/gen75_vpp_gpe.h
+++ b/src/gen75_vpp_gpe.h
@@ -79,6 +79,7 @@ struct vpp_gpe_context{
unsigned char * kernel_param;
unsigned int kernel_param_size;
+
unsigned char * thread_param;
unsigned int thread_param_size;
unsigned int thread_num;
@@ -91,46 +92,30 @@ struct vpp_gpe_context{
unsigned int forward_surf_sum;
unsigned int backward_surf_sum;
- unsigned int x_step;
- unsigned int y_step;
-
unsigned int in_frame_w;
unsigned int in_frame_h;
unsigned int is_first_frame;
- void (*vpp_surface2_setup)(VADriverContextP ctx,
- struct i965_gpe_context *gpe_context,
- struct object_surface *obj_surface,
- unsigned long binding_table_offset,
- unsigned long surface_state_offset);
-
- void (*vpp_media_rw_surface_setup)(VADriverContextP ctx,
- struct i965_gpe_context *gpe_context,
- struct object_surface *obj_surface,
- unsigned long binding_table_offset,
- unsigned long surface_state_offset);
-
- void (*vpp_buffer_surface_setup)(VADriverContextP ctx,
- struct i965_gpe_context *gpe_context,
- struct i965_buffer_surface *buffer_surface,
- unsigned long binding_table_offset,
- unsigned long surface_state_offset);
-
- void (*vpp_media_chroma_surface_setup)(VADriverContextP ctx,
- struct i965_gpe_context *gpe_context,
- struct object_surface *obj_surface,
- unsigned long binding_table_offset,
- unsigned long surface_state_offset);
+ void (*gpe_context_init)(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context);
+
+ void (*gpe_context_destroy)(struct i965_gpe_context *gpe_context);
+
+ void (*gpe_load_kernels)(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct i965_kernel *kernel_list,
+ unsigned int num_kernels);
+
};
struct vpp_gpe_context *
-gen75_gpe_context_init(VADriverContextP ctx);
+vpp_gpe_context_init(VADriverContextP ctx);
void
-gen75_gpe_context_destroy(VADriverContextP ctx,
- struct vpp_gpe_context* vpp_context);
+vpp_gpe_context_destroy(VADriverContextP ctx,
+ struct vpp_gpe_context* vpp_context);
VAStatus
-gen75_gpe_process_picture(VADriverContextP ctx,
- struct vpp_gpe_context * vpp_context);
+vpp_gpe_process_picture(VADriverContextP ctx,
+ struct vpp_gpe_context * vpp_context);
#endif
diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c
index 50df627..1113c90 100644
--- a/src/gen75_vpp_vebox.c
+++ b/src/gen75_vpp_vebox.c
@@ -23,6 +23,7 @@
*
* Authors:
* Li Xiaowei <xiaowei.a.li@intel.com>
+ * Li Zhong <zhong.li@intel.com>
*/
#include <stdio.h>
@@ -52,18 +53,6 @@ i965_DeriveImage(VADriverContextP ctx, VABufferID surface, VAImage *out_image);
extern VAStatus
i965_DestroyImage(VADriverContextP ctx, VAImageID image);
-extern VAStatus
-i965_DestroySurfaces(VADriverContextP ctx,
- VASurfaceID *surface_list,
- int num_surfaces);
-
-extern VAStatus
-i965_CreateSurfaces(VADriverContextP ctx,
- int width,
- int height,
- int format,
- int num_surfaces,
- VASurfaceID *surfaces);
VAStatus vpp_surface_convert(VADriverContextP ctx,
struct object_surface *src_obj_surf,
@@ -104,8 +93,8 @@ VAStatus vpp_surface_scaling(VADriverContextP ctx,
VAStatus va_status = VA_STATUS_SUCCESS;
int flags = I965_PP_FLAG_AVS;
- assert(src_obj_surf->fourcc == VA_FOURCC('N','V','1','2'));
- assert(dst_obj_surf->fourcc == VA_FOURCC('N','V','1','2'));
+ assert(src_obj_surf->fourcc == VA_FOURCC_NV12);
+ assert(dst_obj_surf->fourcc == VA_FOURCC_NV12);
VARectangle src_rect, dst_rect;
src_rect.x = 0;
@@ -130,9 +119,11 @@ VAStatus vpp_surface_scaling(VADriverContextP ctx,
void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
unsigned int* p_table ;
int progressive_dn = 1;
int dndi_top_first = 0;
+ int motion_compensated_enable = 0;
if (proc_ctx->filters_mask & VPP_DNDI_DI) {
VAProcFilterParameterBufferDeinterlacing *di_param =
@@ -140,7 +131,8 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c
assert(di_param);
progressive_dn = 0;
- dndi_top_first = !(di_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST);
+ dndi_top_first = !(di_param->flags & VA_DEINTERLACING_BOTTOM_FIELD);
+ motion_compensated_enable = (di_param->algorithm == VAProcDeinterlacingMotionCompensated);
}
/*
@@ -152,7 +144,9 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c
*/
p_table = (unsigned int *)proc_ctx->dndi_state_table.ptr;
- *p_table ++ = 0; // reserved . w0
+ if (IS_HASWELL(i965->intel.device_info))
+ *p_table ++ = 0; // reserved . w0
+
*p_table ++ = ( 140 << 24 | // denoise STAD threshold . w1
192 << 16 | // dnmh_history_max
0 << 12 | // reserved
@@ -199,7 +193,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c
100<< 16 | // FMD #2 vertical difference th
0 << 14 | // CAT th1
2 << 8 | // FMD tear threshold
- 0 << 7 | // MCDI Enable, use motion compensated deinterlace algorithm
+ motion_compensated_enable << 7 | // MCDI Enable, use motion compensated deinterlace algorithm
progressive_dn << 6 | // progressive DN
0 << 4 | // reserved
dndi_top_first << 3 | // DN/DI Top First
@@ -222,6 +216,8 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c
13 << 6 | // chr temp diff th
7 ); // chr temp diff low
+ if (IS_GEN8(i965->intel.device_info))
+ *p_table ++ = 0; // parameters for hot pixel,
}
void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
@@ -233,40 +229,179 @@ void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *pr
if(!(proc_ctx->filters_mask & VPP_IECP_STD_STE)){
memset(p_table, 0, 29 * 4);
}else{
- *p_table ++ = 0x9a6e39f0;
- *p_table ++ = 0x400c0000;
- *p_table ++ = 0x00001180;
- *p_table ++ = 0xfe2f2e00;
- *p_table ++ = 0x000000ff;
-
- *p_table ++ = 0x00140000;
- *p_table ++ = 0xd82e0000;
- *p_table ++ = 0x8285ecec;
- *p_table ++ = 0x00008282;
- *p_table ++ = 0x00000000;
-
- *p_table ++ = 0x02117000;
- *p_table ++ = 0xa38fec96;
- *p_table ++ = 0x0000c8c8;
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x01478000;
-
- *p_table ++ = 0x0007c306;
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x1c1bd000;
- *p_table ++ = 0x00000000;
-
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x0007cf80;
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x00000000;
-
- *p_table ++ = 0x1c080000;
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x00000000;
- *p_table ++ = 0x00000000;
+ //DWord 0
+ *p_table ++ = ( 154 << 24 | // V_Mid
+ 110 << 16 | // U_Mid
+ 14 << 10 | // Hue_Max
+ 31 << 4 | // Sat_Max
+ 0 << 3 | // Reserved
+ 0 << 2 | // Output Control is set to output the 1=STD score /0=Output Pixels
+ 1 << 1 | // Set STE Enable
+ 1 ); // Set STD Enable
+
+ //DWord 1
+ *p_table ++ = ( 0 << 31 | // Reserved
+ 4 << 28 | // Diamond Margin
+ 0 << 21 | // Diamond_du
+ 3 << 18 | // HS_Margin
+ 79 << 10 | // Cos(alpha)
+ 0 << 8 | // Reserved
+ 101 ); // Sin(alpha)
+
+ //DWord 2
+ *p_table ++ = ( 0 << 21 | // Reserved
+ 100 << 13 | // Diamond_alpha
+ 35 << 7 | // Diamond_Th
+ 0 );
+
+ //DWord 3
+ *p_table ++ = ( 254 << 24 | // Y_point_3
+ 47 << 16 | // Y_point_2
+ 46 << 8 | // Y_point_1
+ 1 << 7 | // VY_STD_Enable
+ 0 ); // Reserved
+
+ //DWord 4
+ *p_table ++ = ( 0 << 18 | // Reserved
+ 31 << 13 | // Y_slope_2
+ 31 << 8 | // Y_slope_1
+ 255 ); // Y_point_4
+
+ //DWord 5
+ *p_table ++ = ( 400 << 16 | // INV_Skin_types_margin = 20* Skin_Type_margin => 20*20
+ 3300 ); // INV_Margin_VYL => 1/Margin_VYL
+
+ //DWord 6
+ *p_table ++ = ( 216 << 24 | // P1L
+ 46 << 16 | // P0L
+ 1600 ); // INV_Margin_VYU
+
+ //DWord 7
+ *p_table ++ = ( 130 << 24 | // B1L
+ 133 << 16 | // B0L
+ 236 << 8 | // P3L
+ 236 ); // P2L
+
+ //DWord 8
+ *p_table ++ = ( 0 << 27 | // Reserved
+ 0x7FB << 16 | // S0L (11 bits, Default value: -5 = FBh, pad it with 1s to make it 11bits)
+ 130 << 8 | // B3L
+ 130 );
+
+ //DWord 9
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 0 << 11 | // S2L
+ 0); // S1L
+
+ //DWord 10
+ *p_table ++ = ( 0 << 27 | // Reserved
+ 66 << 19 | // P1U
+ 46 << 11 | // P0U
+ 0 ); // S3
+
+ //DWord 11
+ *p_table ++ = ( 163 << 24 | // B1U
+ 143 << 16 | // B0U
+ 236 << 8 | // P3U
+ 150 ); // P2U
+
+ //DWord 12
+ *p_table ++ = ( 0 << 27 | // Reserved
+ 256 << 16 | // S0U
+ 200 << 8 | // B3U
+ 200 ); // B2U
+
+ //DWord 13
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 0x74D << 11 | // S2U (11 bits, Default value -179 = F4Dh)
+ 113 ); // S1U
+
+ //DWoord 14
+ *p_table ++ = ( 0 << 28 | // Reserved
+ 20 << 20 | // Skin_types_margin
+ 120 << 12 | // Skin_types_thresh
+ 1 << 11 | // Skin_Types_Enable
+ 0 ); // S3U
+
+ //DWord 15
+ *p_table ++ = ( 0 << 31 | // Reserved
+ 0x3F8 << 21 | // SATB1 (10 bits, default 8, optimized value -8)
+ 31 << 14 | // SATP3
+ 6 << 7 | // SATP2
+ 0x7A ); // SATP1 (7 bits, default 6, optimized value -6)
+
+ //DWord 16
+ *p_table ++ = ( 0 << 31 | // Reserved
+ 297 << 20 | // SATS0
+ 124 << 10 | // SATB3
+ 8 ); // SATB2
+
+ //DWord 17
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 297 << 11 | // SATS2
+ 85 ); // SATS1
+
+ //DWord 18
+ *p_table ++ = ( 14 << 25 | // HUEP3
+ 6 << 18 | // HUEP2
+ 0x7A << 11 | // HUEP1 (7 bits, default value -6 = 7Ah)
+ 256 ); // SATS3
+
+ //DWord 19
+ *p_table ++ = ( 0 << 30 | // Reserved
+ 256 << 20 | // HUEB3
+ 8 << 10 | // HUEB2
+ 0x3F8 ); // HUEB1 (10 bits, default value 8, optimized value -8)
+
+ //DWord 20
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 85 << 11 | // HUES1
+ 384 ); // HUES
+
+ //DWord 21
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 256 << 11 | // HUES3
+ 384 ); // HUES2
+
+ //DWord 22
+ *p_table ++ = ( 0 << 31 | // Reserved
+ 0 << 21 | // SATB1_DARK
+ 31 << 14 | // SATP3_DARK
+ 31 << 7 | // SATP2_DARK
+ 0x7B ); // SATP1_DARK (7 bits, default value -11 = FF5h, optimized value -5)
+
+ //DWord 23
+ *p_table ++ = ( 0 << 31 | // Reserved
+ 305 << 20 | // SATS0_DARK
+ 124 << 10 | // SATB3_DARK
+ 124 ); // SATB2_DARK
+
+ //DWord 24
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 256 << 11 | // SATS2_DARK
+ 220 ); // SATS1_DARK
+
+ //DWord 25
+ *p_table ++ = ( 14 << 25 | // HUEP3_DARK
+ 14 << 18 | // HUEP2_DARK
+ 14 << 11 | // HUEP1_DARK
+ 256 ); // SATS3_DARK
+
+ //DWord 26
+ *p_table ++ = ( 0 << 30 | // Reserved
+ 56 << 20 | // HUEB3_DARK
+ 56 << 10 | // HUEB2_DARK
+ 56 ); // HUEB1_DARK
+
+ //DWord 27
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 256 << 11 | // HUES1_DARK
+ 256 ); // HUES0_DARK
+
+ //DWord 28
+ *p_table ++ = ( 0 << 22 | // Reserved
+ 256 << 11 | // HUES3_DARK
+ 256 ); // HUES2_DARK
}
}
@@ -389,11 +524,11 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *pr
return;
}
- if(proc_ctx->fourcc_input == VA_FOURCC('R','G','B','A') &&
- (proc_ctx->fourcc_output == VA_FOURCC('N','V','1','2') ||
- proc_ctx->fourcc_output == VA_FOURCC('Y','V','1','2') ||
- proc_ctx->fourcc_output == VA_FOURCC('Y','V','Y','2') ||
- proc_ctx->fourcc_output == VA_FOURCC('A','Y','U','V'))) {
+ if(proc_ctx->fourcc_input == VA_FOURCC_RGBA &&
+ (proc_ctx->fourcc_output == VA_FOURCC_NV12 ||
+ proc_ctx->fourcc_output == VA_FOURCC_YV12 ||
+ proc_ctx->fourcc_output == VA_FOURCC_YVY2 ||
+ proc_ctx->fourcc_output == VA_FOURCC_AYUV)) {
tran_coef[0] = 0.257;
tran_coef[1] = 0.504;
@@ -410,12 +545,11 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *pr
u_coef[2] = 128 * 4;
is_transform_enabled = 1;
- }else if((proc_ctx->fourcc_input == VA_FOURCC('N','V','1','2') ||
- proc_ctx->fourcc_input == VA_FOURCC('Y','V','1','2') ||
- proc_ctx->fourcc_input == VA_FOURCC('Y','U','Y','2') ||
- proc_ctx->fourcc_input == VA_FOURCC('A','Y','U','V'))&&
- proc_ctx->fourcc_output == VA_FOURCC('R','G','B','A')) {
-
+ }else if((proc_ctx->fourcc_input == VA_FOURCC_NV12 ||
+ proc_ctx->fourcc_input == VA_FOURCC_YV12 ||
+ proc_ctx->fourcc_input == VA_FOURCC_YUY2 ||
+ proc_ctx->fourcc_input == VA_FOURCC_AYUV) &&
+ proc_ctx->fourcc_output == VA_FOURCC_RGBA) {
tran_coef[0] = 1.164;
tran_coef[1] = 0.000;
tran_coef[2] = 1.569;
@@ -543,7 +677,8 @@ void hsw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *pro
if (di_param->algorithm == VAProcDeinterlacingBob)
is_first_frame = 1;
- if (di_param->algorithm == VAProcDeinterlacingMotionAdaptive &&
+ if ((di_param->algorithm == VAProcDeinterlacingMotionAdaptive ||
+ di_param->algorithm == VAProcDeinterlacingMotionCompensated) &&
proc_ctx->frame_order != -1)
di_output_frames_flag = 0; /* Output both Current Frame and Previous Frame */
}
@@ -750,7 +885,7 @@ void hsw_veb_resource_prepare(VADriverContextP ctx,
}
if(obj_surf_in->bo == NULL){
- input_fourcc = VA_FOURCC('N','V','1','2');
+ input_fourcc = VA_FOURCC_NV12;
input_sampling = SUBSAMPLE_YUV420;
input_tiling = 0;
i965_check_alloc_surface_bo(ctx, obj_surf_in, input_tiling, input_fourcc, input_sampling);
@@ -762,7 +897,7 @@ void hsw_veb_resource_prepare(VADriverContextP ctx,
}
if(obj_surf_out->bo == NULL){
- output_fourcc = VA_FOURCC('N','V','1','2');
+ output_fourcc = VA_FOURCC_NV12;
output_sampling = SUBSAMPLE_YUV420;
output_tiling = 0;
i965_check_alloc_surface_bo(ctx, obj_surf_out, output_tiling, output_fourcc, output_sampling);
@@ -844,8 +979,9 @@ void hsw_veb_resource_prepare(VADriverContextP ctx,
}
-void hsw_veb_surface_reference(VADriverContextP ctx,
- struct intel_vebox_context *proc_ctx)
+static VAStatus
+hsw_veb_surface_reference(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx)
{
struct object_surface * obj_surf;
VEBFrameStore tmp_store;
@@ -870,7 +1006,8 @@ void hsw_veb_surface_reference(VADriverContextP ctx,
(VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di;
if (di_param &&
- di_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
+ (di_param->algorithm == VAProcDeinterlacingMotionAdaptive ||
+ di_param->algorithm == VAProcDeinterlacingMotionCompensated)) {
if ((proc_ctx->filters_mask & VPP_DNDI_DN) &&
proc_ctx->frame_order == 0) { /* DNDI */
tmp_store = proc_ctx->frame_store[FRAME_OUT_CURRENT_DN];
@@ -880,9 +1017,14 @@ void hsw_veb_surface_reference(VADriverContextP ctx,
VAProcPipelineParameterBuffer *pipe = proc_ctx->pipeline_param;
struct object_surface *obj_surf = NULL;
struct i965_driver_data * const i965 = i965_driver_data(ctx);
-
- assert(pipe->num_forward_references == 1);
- assert(pipe->forward_references[0] != VA_INVALID_ID);
+
+ if (!pipe ||
+ !pipe->num_forward_references ||
+ pipe->forward_references[0] == VA_INVALID_ID) {
+ WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
+
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
obj_surf = SURFACE(pipe->forward_references[0]);
assert(obj_surf && obj_surf->bo);
@@ -919,7 +1061,8 @@ void hsw_veb_surface_reference(VADriverContextP ctx,
(VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di;
if (di_param &&
- di_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
+ (di_param->algorithm == VAProcDeinterlacingMotionAdaptive ||
+ di_param->algorithm == VAProcDeinterlacingMotionCompensated)) {
if (proc_ctx->frame_order == -1) {
proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id = VA_INVALID_ID;
proc_ctx->frame_store[FRAME_OUT_CURRENT].is_internal_surface = 0;
@@ -946,6 +1089,8 @@ void hsw_veb_surface_reference(VADriverContextP ctx,
proc_ctx->frame_store[FRAME_OUT_CURRENT].obj_surface = obj_surf;
proc_ctx->current_output = FRAME_OUT_CURRENT;
}
+
+ return VA_STATUS_SUCCESS;
}
void hsw_veb_surface_unreference(VADriverContextP ctx,
@@ -999,17 +1144,17 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx,
}
/* convert the following format to NV12 format */
- if(obj_surf_input->fourcc == VA_FOURCC('Y','V','1','2') ||
- obj_surf_input->fourcc == VA_FOURCC('I','4','2','0') ||
- obj_surf_input->fourcc == VA_FOURCC('I','M','C','1') ||
- obj_surf_input->fourcc == VA_FOURCC('I','M','C','3') ||
- obj_surf_input->fourcc == VA_FOURCC('R','G','B','A')){
+ if(obj_surf_input->fourcc == VA_FOURCC_YV12 ||
+ obj_surf_input->fourcc == VA_FOURCC_I420 ||
+ obj_surf_input->fourcc == VA_FOURCC_IMC1 ||
+ obj_surf_input->fourcc == VA_FOURCC_IMC3 ||
+ obj_surf_input->fourcc == VA_FOURCC_RGBA){
proc_ctx->format_convert_flags |= PRE_FORMAT_CONVERT;
- } else if(obj_surf_input->fourcc == VA_FOURCC('A','Y','U','V') ||
- obj_surf_input->fourcc == VA_FOURCC('Y','U','Y','2') ||
- obj_surf_input->fourcc == VA_FOURCC('N','V','1','2')){
+ } else if(obj_surf_input->fourcc == VA_FOURCC_AYUV ||
+ obj_surf_input->fourcc == VA_FOURCC_YUY2 ||
+ obj_surf_input->fourcc == VA_FOURCC_NV12){
// nothing to do here
} else {
/* not support other format as input */
@@ -1030,7 +1175,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx,
if (obj_surf_input_vebox) {
proc_ctx->surface_input_vebox_object = obj_surf_input_vebox;
- i965_check_alloc_surface_bo(ctx, obj_surf_input_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surf_input_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
}
}
@@ -1038,16 +1183,16 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx,
}
/* create one temporary NV12 surfaces for conversion*/
- if(obj_surf_output->fourcc == VA_FOURCC('Y','V','1','2') ||
- obj_surf_output->fourcc == VA_FOURCC('I','4','2','0') ||
- obj_surf_output->fourcc == VA_FOURCC('I','M','C','1') ||
- obj_surf_output->fourcc == VA_FOURCC('I','M','C','3') ||
- obj_surf_output->fourcc == VA_FOURCC('R','G','B','A')) {
+ if(obj_surf_output->fourcc == VA_FOURCC_YV12 ||
+ obj_surf_output->fourcc == VA_FOURCC_I420 ||
+ obj_surf_output->fourcc == VA_FOURCC_IMC1 ||
+ obj_surf_output->fourcc == VA_FOURCC_IMC3 ||
+ obj_surf_output->fourcc == VA_FOURCC_RGBA) {
proc_ctx->format_convert_flags |= POST_FORMAT_CONVERT;
- } else if(obj_surf_output->fourcc == VA_FOURCC('A','Y','U','V') ||
- obj_surf_output->fourcc == VA_FOURCC('Y','U','Y','2') ||
- obj_surf_output->fourcc == VA_FOURCC('N','V','1','2')){
+ } else if(obj_surf_output->fourcc == VA_FOURCC_AYUV ||
+ obj_surf_output->fourcc == VA_FOURCC_YUY2 ||
+ obj_surf_output->fourcc == VA_FOURCC_NV12){
/* Nothing to do here */
} else {
/* not support other format as input */
@@ -1069,7 +1214,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx,
if (obj_surf_output_vebox) {
proc_ctx->surface_output_vebox_object = obj_surf_output_vebox;
- i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
}
}
}
@@ -1088,7 +1233,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx,
if (obj_surf_output_vebox) {
proc_ctx->surface_output_scaled_object = obj_surf_output_vebox;
- i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
}
}
}
@@ -1118,7 +1263,7 @@ int hsw_veb_post_format_convert(VADriverContextP ctx,
} else if(proc_ctx->format_convert_flags & POST_SCALING_CONVERT) {
/* scaling, convert and copy NV12 to YV12/IMC3/IMC2/RGBA output*/
- assert(obj_surface->fourcc == VA_FOURCC('N','V','1','2'));
+ assert(obj_surface->fourcc == VA_FOURCC_NV12);
/* first step :surface scaling */
vpp_surface_scaling(ctx,proc_ctx->surface_output_scaled_object, obj_surface);
@@ -1126,13 +1271,13 @@ int hsw_veb_post_format_convert(VADriverContextP ctx,
/* second step: color format convert and copy to output */
obj_surface = proc_ctx->surface_output_object;
- if(obj_surface->fourcc == VA_FOURCC('N','V','1','2') ||
- obj_surface->fourcc == VA_FOURCC('Y','V','1','2') ||
- obj_surface->fourcc == VA_FOURCC('I','4','2','0') ||
- obj_surface->fourcc == VA_FOURCC('Y','U','Y','2') ||
- obj_surface->fourcc == VA_FOURCC('I','M','C','1') ||
- obj_surface->fourcc == VA_FOURCC('I','M','C','3') ||
- obj_surface->fourcc == VA_FOURCC('R','G','B','A')) {
+ if(obj_surface->fourcc == VA_FOURCC_NV12 ||
+ obj_surface->fourcc == VA_FOURCC_YV12 ||
+ obj_surface->fourcc == VA_FOURCC_I420 ||
+ obj_surface->fourcc == VA_FOURCC_YUY2 ||
+ obj_surface->fourcc == VA_FOURCC_IMC1 ||
+ obj_surface->fourcc == VA_FOURCC_IMC3 ||
+ obj_surface->fourcc == VA_FOURCC_RGBA) {
vpp_surface_convert(ctx, proc_ctx->surface_output_object, proc_ctx->surface_output_scaled_object);
}else {
assert(0);
@@ -1172,6 +1317,9 @@ VAStatus gen75_vebox_process_picture(VADriverContextP ctx,
proc_ctx->filters_mask |= VPP_IECP_PRO_AMP;
proc_ctx->filter_iecp_amp = filter;
proc_ctx->filter_iecp_amp_num_elements = obj_buf->num_elements;
+ } else if (filter->type == VAProcFilterSkinToneEnhancement) {
+ proc_ctx->filters_mask |= VPP_IECP_STD_STE;
+ proc_ctx->filter_iecp_std = filter;
}
}
@@ -1297,3 +1445,215 @@ struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx)
return proc_context;
}
+void bdw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ struct intel_batchbuffer *batch = proc_ctx->batch;
+ unsigned int is_dn_enabled = (proc_ctx->filters_mask & 0x01)? 1: 0;
+ unsigned int is_di_enabled = (proc_ctx->filters_mask & 0x02)? 1: 0;
+ unsigned int is_iecp_enabled = (proc_ctx->filters_mask & 0xff00)?1:0;
+ unsigned int is_first_frame = !!((proc_ctx->frame_order == -1) &&
+ (is_di_enabled ||
+ is_dn_enabled));
+ unsigned int di_output_frames_flag = 2; /* Output Current Frame Only */
+
+ if(proc_ctx->fourcc_input != proc_ctx->fourcc_output ||
+ (is_dn_enabled == 0 && is_di_enabled == 0)){
+ is_iecp_enabled = 1;
+ }
+
+ if (is_di_enabled) {
+ VAProcFilterParameterBufferDeinterlacing *di_param =
+ (VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di;
+
+ assert(di_param);
+
+ if (di_param->algorithm == VAProcDeinterlacingBob)
+ is_first_frame = 1;
+
+ if ((di_param->algorithm == VAProcDeinterlacingMotionAdaptive ||
+ di_param->algorithm == VAProcDeinterlacingMotionCompensated) &&
+ proc_ctx->frame_order != -1)
+ di_output_frames_flag = 0; /* Output both Current Frame and Previous Frame */
+ }
+
+ BEGIN_VEB_BATCH(batch, 0xc);
+ OUT_VEB_BATCH(batch, VEB_STATE | (0xc - 2));
+ OUT_VEB_BATCH(batch,
+ 0 << 25 | // state surface control bits
+ 0 << 23 | // reserved.
+ 0 << 22 | // gamut expansion position
+ 0 << 15 | // reserved.
+ 0 << 14 | // single slice vebox enable
+ 0 << 13 | // hot pixel filter enable
+ 0 << 12 | // alpha plane enable
+ 0 << 11 | // vignette enable
+ 0 << 10 | // demosaic enable
+ di_output_frames_flag << 8 | // DI output frame
+ 1 << 7 | // 444->422 downsample method
+ 1 << 6 | // 422->420 downsample method
+ is_first_frame << 5 | // DN/DI first frame
+ is_di_enabled << 4 | // DI enable
+ is_dn_enabled << 3 | // DN enable
+ is_iecp_enabled << 2 | // global IECP enabled
+ 0 << 1 | // ColorGamutCompressionEnable
+ 0 ) ; // ColorGamutExpansionEnable.
+
+ OUT_RELOC(batch,
+ proc_ctx->dndi_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ OUT_VEB_BATCH(batch, 0);
+
+ OUT_RELOC(batch,
+ proc_ctx->iecp_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ OUT_VEB_BATCH(batch, 0);
+
+ OUT_RELOC(batch,
+ proc_ctx->gamut_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ OUT_VEB_BATCH(batch, 0);
+
+ OUT_RELOC(batch,
+ proc_ctx->vertex_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ OUT_VEB_BATCH(batch, 0);
+
+ OUT_VEB_BATCH(batch, 0);/*caputre pipe state pointer*/
+ OUT_VEB_BATCH(batch, 0);
+
+ ADVANCE_VEB_BATCH(batch);
+}
+
+void bdw_veb_dndi_iecp_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ struct intel_batchbuffer *batch = proc_ctx->batch;
+ unsigned char frame_ctrl_bits = 0;
+ unsigned int startingX = 0;
+ unsigned int endingX = (proc_ctx->width_input + 63 ) / 64 * 64;
+
+ BEGIN_VEB_BATCH(batch, 0x14);
+ OUT_VEB_BATCH(batch, VEB_DNDI_IECP_STATE | (0x14 - 2));//DWord 0
+ OUT_VEB_BATCH(batch,
+ startingX << 16 |
+ endingX -1);//DWord 1
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_IN_CURRENT].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 2
+ OUT_VEB_BATCH(batch,0);//DWord 3
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_IN_PREVIOUS].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 4
+ OUT_VEB_BATCH(batch,0);//DWord 5
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_IN_STMM].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 6
+ OUT_VEB_BATCH(batch,0);//DWord 7
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_STMM].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 8
+ OUT_VEB_BATCH(batch,0);//DWord 9
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 10
+ OUT_VEB_BATCH(batch,0);//DWord 11
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 12
+ OUT_VEB_BATCH(batch,0);//DWord 13
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_PREVIOUS].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 14
+ OUT_VEB_BATCH(batch,0);//DWord 15
+
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_STATISTIC].obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 16
+ OUT_VEB_BATCH(batch,0);//DWord 17
+
+ OUT_VEB_BATCH(batch,0);//DWord 18
+ OUT_VEB_BATCH(batch,0);//DWord 19
+
+ ADVANCE_VEB_BATCH(batch);
+}
+
+VAStatus gen8_vebox_process_picture(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ VAProcPipelineParameterBuffer *pipe = proc_ctx->pipeline_param;
+ VAProcFilterParameterBuffer* filter = NULL;
+ struct object_buffer *obj_buf = NULL;
+ unsigned int i;
+
+ for (i = 0; i < pipe->num_filters; i ++) {
+ obj_buf = BUFFER(pipe->filters[i]);
+
+ assert(obj_buf && obj_buf->buffer_store);
+
+ if (!obj_buf || !obj_buf->buffer_store)
+ goto error;
+
+ filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
+
+ if (filter->type == VAProcFilterNoiseReduction) {
+ proc_ctx->filters_mask |= VPP_DNDI_DN;
+ proc_ctx->filter_dn = filter;
+ } else if (filter->type == VAProcFilterDeinterlacing) {
+ proc_ctx->filters_mask |= VPP_DNDI_DI;
+ proc_ctx->filter_di = filter;
+ } else if (filter->type == VAProcFilterColorBalance) {
+ proc_ctx->filters_mask |= VPP_IECP_PRO_AMP;
+ proc_ctx->filter_iecp_amp = filter;
+ proc_ctx->filter_iecp_amp_num_elements = obj_buf->num_elements;
+ } else if (filter->type == VAProcFilterSkinToneEnhancement) {
+ proc_ctx->filters_mask |= VPP_IECP_STD_STE;
+ proc_ctx->filter_iecp_std = filter;
+ }
+ }
+
+ hsw_veb_pre_format_convert(ctx, proc_ctx);
+ hsw_veb_surface_reference(ctx, proc_ctx);
+
+ if (proc_ctx->frame_order == -1) {
+ hsw_veb_resource_prepare(ctx, proc_ctx);
+ }
+
+ if (proc_ctx->format_convert_flags & POST_COPY_CONVERT) {
+ assert(proc_ctx->frame_order == 1);
+ /* directly copy the saved frame in the second call */
+ } else {
+ intel_batchbuffer_start_atomic_veb(proc_ctx->batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(proc_ctx->batch);
+ hsw_veb_surface_state(ctx, proc_ctx, INPUT_SURFACE);
+ hsw_veb_surface_state(ctx, proc_ctx, OUTPUT_SURFACE);
+ hsw_veb_state_table_setup(ctx, proc_ctx);
+
+ bdw_veb_state_command(ctx, proc_ctx);
+ bdw_veb_dndi_iecp_command(ctx, proc_ctx);
+ intel_batchbuffer_end_atomic(proc_ctx->batch);
+ intel_batchbuffer_flush(proc_ctx->batch);
+ }
+
+ hsw_veb_post_format_convert(ctx, proc_ctx);
+ // hsw_veb_surface_unreference(ctx, proc_ctx);
+
+ proc_ctx->frame_order = (proc_ctx->frame_order + 1) % 2;
+
+ return VA_STATUS_SUCCESS;
+
+error:
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+}
+
diff --git a/src/gen75_vpp_vebox.h b/src/gen75_vpp_vebox.h
index f1061c7..a78a165 100644
--- a/src/gen75_vpp_vebox.h
+++ b/src/gen75_vpp_vebox.h
@@ -150,4 +150,7 @@ void gen75_vebox_context_destroy(VADriverContextP ctx,
struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx);
+VAStatus gen8_vebox_process_picture(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx);
+
#endif
diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c
index 8572b89..78b1096 100644
--- a/src/gen7_mfc.c
+++ b/src/gen7_mfc.c
@@ -41,12 +41,16 @@
#include "gen6_mfc.h"
#include "gen6_vme.h"
+#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
+
extern void
gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx,
struct intel_encoder_context *encoder_context);
extern void
gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context);
+ struct intel_encoder_context *encoder_context);
extern void
gen6_mfc_init(VADriverContextP ctx,
struct encode_state *encode_state,
@@ -208,13 +212,13 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
- /*DW1 frame size */
+ /*DW1 frame size */
OUT_BCS_BATCH(batch,
- ((width_in_mbs * height_in_mbs) & 0xFFFF));
+ ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
OUT_BCS_BATCH(batch,
((height_in_mbs - 1) << 16) |
((width_in_mbs - 1) << 0));
- /*DW3 Qp setting */
+ /*DW3 Qp setting */
OUT_BCS_BATCH(batch,
(0 << 24) | /* Second Chroma QP Offset */
(0 << 16) | /* Chroma QP Offset */
@@ -240,20 +244,20 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
(1 << 2) | /* Frame MB only flag */
(0 << 1) | /* MBAFF mode is in active */
(0 << 0)); /* Field picture flag */
- /*DW5 trequllis quantization */
+ /*DW5 trequllis quantization */
OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
(0xBB8 << 16) | /* InterMbMaxSz */
(0xEE8) ); /* IntraMbMaxSz */
- /* DW7 */
+ /* DW7 */
OUT_BCS_BATCH(batch, 0); /* Reserved */
OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
- /* DW10 frame bit setting */
+ /* DW10 frame bit setting */
OUT_BCS_BATCH(batch, 0x8C000000);
OUT_BCS_BATCH(batch, 0x00010000);
OUT_BCS_BATCH(batch, 0);
- /* DW13 Ref setting */
+ /* DW13 Ref setting */
OUT_BCS_BATCH(batch, 0x02010100);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
@@ -373,17 +377,19 @@ va_to_gen7_mpeg2_picture_type[3] = {
static void
gen7_mfc_mpeg2_pic_state(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context,
- struct encode_state *encode_state)
+ struct intel_encoder_context *encoder_context,
+ struct encode_state *encode_state)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
VAEncPictureParameterBufferMPEG2 *pic_param;
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
BEGIN_BCS_BATCH(batch, 13);
OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
@@ -408,7 +414,12 @@ gen7_mfc_mpeg2_pic_state(VADriverContextP ctx,
1 << 31 | /* slice concealment */
(height_in_mbs - 1) << 16 |
(width_in_mbs - 1));
- OUT_BCS_BATCH(batch, 0);
+
+ if (slice_param && slice_param->quantiser_scale_code >= 14)
+ OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
+ else
+ OUT_BCS_BATCH(batch, 0);
+
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch,
0xFFF << 16 | /* InterMBMaxSize */
@@ -427,7 +438,7 @@ static void
gen7_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
unsigned char intra_qm[64] = {
- 8, 16, 19, 22, 26, 27, 29, 34,
+ 8, 16, 19, 22, 26, 27, 29, 34,
16, 16, 22, 24, 27, 29, 34, 37,
19, 22, 26, 27, 29, 34, 34, 38,
22, 22, 26, 27, 29, 34, 37, 40,
@@ -456,14 +467,14 @@ static void
gen7_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
unsigned short intra_fqm[64] = {
- 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
- 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
- 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
- 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
- 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
- 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
- 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
- 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
+ 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
+ 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
+ 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
+ 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
+ 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
+ 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
+ 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
+ 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
};
unsigned short non_intra_fqm[64] = {
@@ -483,14 +494,14 @@ gen7_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enc
static void
gen7_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context,
- int x, int y,
- int next_x, int next_y,
- int is_fisrt_slice_group,
- int is_last_slice_group,
- int intra_slice,
- int qp,
- struct intel_batchbuffer *batch)
+ struct intel_encoder_context *encoder_context,
+ int x, int y,
+ int next_x, int next_y,
+ int is_fisrt_slice_group,
+ int is_last_slice_group,
+ int intra_slice,
+ int qp,
+ struct intel_batchbuffer *batch)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -530,18 +541,18 @@ gen7_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
static int
gen7_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
- struct intel_encoder_context *encoder_context,
- int x, int y,
- int first_mb_in_slice,
- int last_mb_in_slice,
- int first_mb_in_slice_group,
- int last_mb_in_slice_group,
- int mb_type,
- int qp_scale_code,
- int coded_block_pattern,
- unsigned char target_size_in_word,
- unsigned char max_size_in_word,
- struct intel_batchbuffer *batch)
+ struct intel_encoder_context *encoder_context,
+ int x, int y,
+ int first_mb_in_slice,
+ int last_mb_in_slice,
+ int first_mb_in_slice_group,
+ int last_mb_in_slice_group,
+ int mb_type,
+ int qp_scale_code,
+ int coded_block_pattern,
+ unsigned char target_size_in_word,
+ unsigned char max_size_in_word,
+ struct intel_batchbuffer *batch)
{
int len_in_dwords = 9;
@@ -627,19 +638,19 @@ mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
static int
gen7_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- unsigned int *msg,
- int width_in_mbs, int height_in_mbs,
- int x, int y,
- int first_mb_in_slice,
- int last_mb_in_slice,
- int first_mb_in_slice_group,
- int last_mb_in_slice_group,
- int qp_scale_code,
- unsigned char target_size_in_word,
- unsigned char max_size_in_word,
- struct intel_batchbuffer *batch)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ unsigned int *msg,
+ int width_in_mbs, int height_in_mbs,
+ int x, int y,
+ int first_mb_in_slice,
+ int last_mb_in_slice,
+ int first_mb_in_slice_group,
+ int last_mb_in_slice_group,
+ int qp_scale_code,
+ unsigned char target_size_in_word,
+ unsigned char max_size_in_word,
+ struct intel_batchbuffer *batch)
{
VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
int len_in_dwords = 9;
@@ -697,9 +708,9 @@ gen7_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
static void
gen7_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- struct intel_batchbuffer *slice_batch)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ struct intel_batchbuffer *slice_batch)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
@@ -751,11 +762,11 @@ gen7_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
static void
gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int slice_index,
- VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
- struct intel_batchbuffer *slice_batch)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index,
+ VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
+ struct intel_batchbuffer *slice_batch)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -787,16 +798,16 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
}
gen7_mfc_mpeg2_slicegroup_state(ctx,
- encoder_context,
- h_start_pos,
- v_start_pos,
- h_next_start_pos,
- v_next_start_pos,
- slice_index == 0,
- next_slice_group_param == NULL,
- slice_param->is_intra_slice,
- slice_param->quantiser_scale_code,
- slice_batch);
+ encoder_context,
+ h_start_pos,
+ v_start_pos,
+ h_next_start_pos,
+ v_next_start_pos,
+ slice_index == 0,
+ next_slice_group_param == NULL,
+ slice_param->is_intra_slice,
+ slice_param->quantiser_scale_code,
+ slice_batch);
if (slice_index == 0)
gen7_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -826,36 +837,36 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
if (slice_param->is_intra_slice) {
gen7_mfc_mpeg2_pak_object_intra(ctx,
- encoder_context,
- h_pos, v_pos,
- first_mb_in_slice,
- last_mb_in_slice,
- first_mb_in_slice_group,
- last_mb_in_slice_group,
- 0x1a,
- slice_param->quantiser_scale_code,
- 0x3f,
- 0,
- 0xff,
- slice_batch);
+ encoder_context,
+ h_pos, v_pos,
+ first_mb_in_slice,
+ last_mb_in_slice,
+ first_mb_in_slice_group,
+ last_mb_in_slice_group,
+ 0x1a,
+ slice_param->quantiser_scale_code,
+ 0x3f,
+ 0,
+ 0xff,
+ slice_batch);
} else {
msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
if(msg[32] & INTRA_MB_FLAG_MASK) {
- gen7_mfc_mpeg2_pak_object_intra(ctx,
- encoder_context,
- h_pos, v_pos,
- first_mb_in_slice,
- last_mb_in_slice,
- first_mb_in_slice_group,
- last_mb_in_slice_group,
- 0x1a,
- slice_param->quantiser_scale_code,
- 0x3f,
- 0,
- 0xff,
- slice_batch);
- } else {
+ gen7_mfc_mpeg2_pak_object_intra(ctx,
+ encoder_context,
+ h_pos, v_pos,
+ first_mb_in_slice,
+ last_mb_in_slice,
+ first_mb_in_slice_group,
+ last_mb_in_slice_group,
+ 0x1a,
+ slice_param->quantiser_scale_code,
+ 0x3f,
+ 0,
+ 0xff,
+ slice_batch);
+ } else {
gen7_mfc_mpeg2_pak_object_inter(ctx,
encode_state,
@@ -871,8 +882,8 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
0,
0xff,
slice_batch);
- }
- }
+ }
+ }
}
slice_param++;
@@ -913,21 +924,16 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
*/
static dri_bo *
gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct intel_batchbuffer *batch;
- VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
dri_bo *batch_bo;
int i;
- int buffer_size;
- int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
- int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
- buffer_size = width_in_mbs * height_in_mbs * 64;
- batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
+ batch = mfc_context->aux_batchbuffer;
batch_bo = batch->buffer;
for (i = 0; i < encode_state->num_slice_params_ext; i++) {
@@ -948,14 +954,15 @@ gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
dri_bo_reference(batch_bo);
intel_batchbuffer_free(batch);
+ mfc_context->aux_batchbuffer = NULL;
return batch_bo;
}
static void
gen7_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
@@ -971,8 +978,8 @@ gen7_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
static void
gen7_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
dri_bo *slice_batch_bo;
@@ -1002,8 +1009,8 @@ gen7_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
static VAStatus
gen7_mfc_mpeg2_prepare(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct object_surface *obj_surface;
@@ -1015,7 +1022,7 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx,
/* reconstructed surface */
obj_surface = encode_state->reconstructed_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
mfc_context->pre_deblocking_output.bo = obj_surface->bo;
dri_bo_reference(mfc_context->pre_deblocking_output.bo);
mfc_context->surface_state.width = obj_surface->orig_width;
@@ -1069,7 +1076,7 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx,
dri_bo_map(bo, 1);
coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
coded_buffer_segment->mapped = 0;
- coded_buffer_segment->codec = CODED_MPEG2;
+ coded_buffer_segment->codec = encoder_context->codec;
dri_bo_unmap(bo);
return vaStatus;
@@ -1077,8 +1084,8 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx,
static VAStatus
gen7_mfc_mpeg2_encode_picture(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
gen6_mfc_init(ctx, encode_state, encoder_context);
gen7_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
@@ -1098,7 +1105,7 @@ gen7_mfc_pipeline(VADriverContextP ctx,
VAStatus vaStatus;
switch (profile) {
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
index 8e0d503..bfb95bf 100755
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -65,6 +65,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx,
if (!gen7_avc_surface) {
gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+ gen7_avc_surface->frame_store_id = -1;
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = gen7_avc_surface;
}
@@ -135,12 +136,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -148,7 +153,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
@@ -353,7 +358,7 @@ gen7_mfd_avc_img_state(VADriverContextP ctx,
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
OUT_BCS_BATCH(batch,
- width_in_mbs * height_in_mbs);
+ (width_in_mbs * height_in_mbs - 1));
OUT_BCS_BATCH(batch,
((height_in_mbs - 1) << 16) |
((width_in_mbs - 1) << 0));
@@ -425,7 +430,7 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
struct object_surface *obj_surface;
GenAvcSurface *gen7_avc_surface;
VAPictureH264 *va_pic;
- int i, j;
+ int i;
BEGIN_BCS_BATCH(batch, 69);
OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
@@ -477,26 +482,14 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
/* POC List */
for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
- if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
- int found = 0;
-
- assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
-
- for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
- va_pic = &pic_param->ReferenceFrames[j];
-
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
+ obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
- if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
- found = 1;
- break;
- }
- }
+ if (obj_surface) {
+ const VAPictureH264 * const va_pic = avc_find_picture(
+ obj_surface->base.id, pic_param->ReferenceFrames,
+ ARRAY_ELEMS(pic_param->ReferenceFrames));
- assert(found == 1);
- assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
-
+ assert(va_pic != NULL);
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
} else {
@@ -513,6 +506,15 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
}
static void
+gen7_mfd_avc_phantom_slice_first(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
+}
+
+static void
gen7_mfd_avc_slice_state(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
@@ -748,7 +750,8 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
- intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
+ intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
+ gen7_mfd_context->reference_surface, &gen7_mfd_context->fs_ctx);
width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
@@ -756,20 +759,12 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
- obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
-
- /* initial uv component for YUV400 case */
- if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
- unsigned int uv_offset = obj_surface->width * obj_surface->height;
- unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
-
- drm_intel_gem_bo_map_gtt(obj_surface->bo);
- memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
- drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
- }
+ if (pic_param->pic_fields.bits.reference_pic_flag)
+ obj_surface->flags |= SURFACE_REFERENCED;
+ else
+ obj_surface->flags &= ~SURFACE_REFERENCED;
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
@@ -856,6 +851,9 @@ gen7_mfd_avc_decode_picture(VADriverContextP ctx,
else
next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+ if (j == 0 && slice_param->first_mb_in_slice)
+ gen7_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
+
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
assert((slice_param->slice_type == SLICE_TYPE_I) ||
@@ -906,7 +904,7 @@ gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
@@ -942,7 +940,7 @@ gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
- if (IS_HASWELL(i965->intel.device_id)) {
+ if (IS_HASWELL(i965->intel.device_info)) {
/* XXX: disable concealment for now */
slice_concealment_disable_bit = 1;
}
@@ -1040,10 +1038,35 @@ gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
}
}
+uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param)
+{
+ uint8_t *buf;
+ uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3);
+ uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3);
+ uint32_t i;
+
+ dri_bo_map(slice_data_bo, 0);
+ buf = (uint8_t *)slice_data_bo->virtual + buf_offset;
+
+ for (i = 3; i < buf_size; i++) {
+ if (buf[i - 3] &&
+ !buf[i - 2] &&
+ !buf[i - 1] &&
+ !buf[i]) {
+ dri_bo_unmap(slice_data_bo);
+ return i - 3 + 1;
+ }
+ }
+
+ dri_bo_unmap(slice_data_bo);
+ return buf_size;
+}
+
static void
gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
VAPictureParameterBufferMPEG2 *pic_param,
VASliceParameterBufferMPEG2 *slice_param,
+ dri_bo *slice_data_bo,
VASliceParameterBufferMPEG2 *next_slice_param,
struct gen7_mfd_context *gen7_mfd_context)
{
@@ -1074,7 +1097,7 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
BEGIN_BCS_BATCH(batch, 5);
OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
OUT_BCS_BATCH(batch,
- slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
+ mpeg2_get_slice_data_length(slice_data_bo, slice_param));
OUT_BCS_BATCH(batch,
slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
OUT_BCS_BATCH(batch,
@@ -1086,7 +1109,7 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
(slice_param->macroblock_offset & 0x7));
OUT_BCS_BATCH(batch,
(slice_param->quantiser_scale_code << 24) |
- (IS_HASWELL(i965->intel.device_id) ? (vpos1 << 8 | hpos1) : 0));
+ (IS_HASWELL(i965->intel.device_info) ? (vpos1 << 8 | hpos1) : 0));
ADVANCE_BCS_BATCH(batch);
}
@@ -1137,7 +1160,7 @@ gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
else
next_slice_param = next_slice_group_param;
- gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+ gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
slice_param++;
}
}
@@ -1247,7 +1270,7 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx,
/* Current decoded picture */
obj_surface = decode_state->render_object;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
@@ -1798,13 +1821,13 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
struct object_surface *obj_surface;
VAPictureParameterBufferJPEGBaseline *pic_param;
int subsampling = SUBSAMPLE_YUV420;
- int fourcc = VA_FOURCC('I', 'M', 'C', '3');
+ int fourcc = VA_FOURCC_IMC3;
pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
if (pic_param->num_components == 1) {
subsampling = SUBSAMPLE_YUV400;
- fourcc = VA_FOURCC('Y', '8', '0', '0');
+ fourcc = VA_FOURCC_Y800;
} else if (pic_param->num_components == 3) {
int h1 = pic_param->components[0].h_sampling_factor;
int h2 = pic_param->components[1].h_sampling_factor;
@@ -1816,31 +1839,31 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
if (h1 == 2 && h2 == 1 && h3 == 1 &&
v1 == 2 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV420;
- fourcc = VA_FOURCC('I', 'M', 'C', '3');
+ fourcc = VA_FOURCC_IMC3;
} else if (h1 == 2 && h2 == 1 && h3 == 1 &&
v1 == 1 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV422H;
- fourcc = VA_FOURCC('4', '2', '2', 'H');
+ fourcc = VA_FOURCC_422H;
} else if (h1 == 1 && h2 == 1 && h3 == 1 &&
v1 == 1 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV444;
- fourcc = VA_FOURCC('4', '4', '4', 'P');
+ fourcc = VA_FOURCC_444P;
} else if (h1 == 4 && h2 == 1 && h3 == 1 &&
v1 == 1 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV411;
- fourcc = VA_FOURCC('4', '1', '1', 'P');
+ fourcc = VA_FOURCC_411P;
} else if (h1 == 1 && h2 == 1 && h3 == 1 &&
v1 == 2 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV422V;
- fourcc = VA_FOURCC('4', '2', '2', 'V');
+ fourcc = VA_FOURCC_422V;
} else if (h1 == 2 && h2 == 1 && h3 == 1 &&
v1 == 2 && v2 == 2 && v3 == 2) {
subsampling = SUBSAMPLE_YUV422H;
- fourcc = VA_FOURCC('4', '2', '2', 'H');
+ fourcc = VA_FOURCC_422H;
} else if (h2 == 2 && h2 == 2 && h3 == 2 &&
v1 == 2 && v2 == 1 && v3 == 1) {
subsampling = SUBSAMPLE_YUV422V;
- fourcc = VA_FOURCC('4', '2', '2', 'V');
+ fourcc = VA_FOURCC_422V;
} else
assert(0);
} else {
@@ -2091,18 +2114,6 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
/* Workaround for JPEG decoding on Ivybridge */
-VAStatus
-i965_DestroySurfaces(VADriverContextP ctx,
- VASurfaceID *surface_list,
- int num_surfaces);
-VAStatus
-i965_CreateSurfaces(VADriverContextP ctx,
- int width,
- int height,
- int format,
- int num_surfaces,
- VASurfaceID *surfaces);
-
static struct {
int width;
int height;
@@ -2145,7 +2156,7 @@ gen7_jpeg_wa_init(VADriverContextP ctx,
obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
assert(obj_surface);
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
@@ -2320,7 +2331,7 @@ gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
OUT_BCS_BATCH(batch,
- width_in_mbs * height_in_mbs);
+ (width_in_mbs * height_in_mbs - 1));
OUT_BCS_BATCH(batch,
((height_in_mbs - 1) << 16) |
((width_in_mbs - 1) << 0));
@@ -2614,9 +2625,10 @@ gen7_mfd_decode_picture(VADriverContextP ctx,
gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
+ case VAProfileH264StereoHigh:
gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
break;
@@ -2708,9 +2720,10 @@ gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
+ case VAProfileH264StereoHigh:
gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
break;
default:
diff --git a/src/gen7_mfd.h b/src/gen7_mfd.h
index e3111ab..af8e960 100644
--- a/src/gen7_mfd.h
+++ b/src/gen7_mfd.h
@@ -77,6 +77,7 @@ struct gen7_mfd_context
VAIQMatrixBufferH264 h264; /* flat scaling lists (default) */
} iq_matrix;
+ GenFrameStoreContext fs_ctx;
GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES];
GenBuffer post_deblocking_output;
GenBuffer pre_deblocking_output;
@@ -85,6 +86,7 @@ struct gen7_mfd_context
GenBuffer bsd_mpc_row_store_scratch_buffer;
GenBuffer mpr_row_store_scratch_buffer;
GenBuffer bitplane_read_buffer;
+ GenBuffer segmentation_buffer;
VASurfaceID jpeg_wa_surface_id;
struct object_surface *jpeg_wa_surface_object;
diff --git a/src/gen7_vme.c b/src/gen7_vme.c
index 88eb484..dc15445 100644
--- a/src/gen7_vme.c
+++ b/src/gen7_vme.c
@@ -45,9 +45,6 @@
#endif
#define VME_MSG_LENGTH 32
-#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN7
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
@@ -126,7 +123,7 @@ static struct i965_kernel gen7_vme_kernels[] = {
};
static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = {
-#include "shaders/vme/mpeg2_inter_frame.g7b"
+#include "shaders/vme/mpeg2_inter_ivb.g7b"
};
static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = {
@@ -250,7 +247,6 @@ gen7_vme_surface_setup(VADriverContextP ctx,
struct intel_encoder_context *encoder_context)
{
struct object_surface *obj_surface;
- struct i965_driver_data *i965 = i965_driver_data(ctx);
/*Setup surfaces state*/
/* current picture for encoding */
@@ -261,43 +257,14 @@ gen7_vme_surface_setup(VADriverContextP ctx,
if (!is_intra) {
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
int slice_type;
- struct object_surface *slice_obj_surface;
- int ref_surface_id;
slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
- if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList0[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[0];
- }
- /* reference 0 */
- if (obj_surface && obj_surface->bo)
- gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
- }
- if (slice_type == SLICE_TYPE_B) {
- /* reference 1 */
- slice_obj_surface = NULL;
- ref_surface_id = slice_param->RefPicList1[0].picture_id;
- if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
- slice_obj_surface = SURFACE(ref_surface_id);
- }
- if (slice_obj_surface && slice_obj_surface->bo) {
- obj_surface = slice_obj_surface;
- } else {
- obj_surface = encode_state->reference_objects[0];
- }
-
- obj_surface = encode_state->reference_objects[1];
- if (obj_surface && obj_surface->bo)
- gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
- }
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen7_vme_source_surface_state);
+
+ if (slice_type == SLICE_TYPE_B)
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen7_vme_source_surface_state);
}
/* VME output */
@@ -359,100 +326,39 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
- // unsigned char *constant_buffer;
+ unsigned char *constant_buffer;
unsigned int *vme_state_message;
- int mv_num = 32;
- if (vme_context->h264_level >= 30) {
- mv_num = 16;
- if (vme_context->h264_level >= 31)
- mv_num = 8;
- }
+ int mv_num;
+
+ vme_state_message = (unsigned int *)vme_context->vme_state_message;
+ mv_num = 32;
+
+ if (encoder_context->codec == CODEC_H264) {
+ if (vme_context->h264_level >= 30) {
+ mv_num = 16;
+
+ if (vme_context->h264_level >= 31)
+ mv_num = 8;
+ }
+ } else if (encoder_context->codec == CODEC_MPEG2) {
+ mv_num = 2;
+ }
+
+
+ vme_state_message[31] = mv_num;
dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
assert(vme_context->gpe_context.curbe.bo->virtual);
- // constant_buffer = vme_context->curbe.bo->virtual;
- vme_state_message = (unsigned int *)vme_context->gpe_context.curbe.bo->virtual;
- vme_state_message[31] = mv_num;
-
- /*TODO copy buffer into CURB*/
+ constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
+ /* Pass the required constant info into the constant buffer */
+ memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
+
dri_bo_unmap( vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
-static const unsigned int intra_mb_mode_cost_table[] = {
- 0x31110001, // for qp0
- 0x09110001, // for qp1
- 0x15030001, // for qp2
- 0x0b030001, // for qp3
- 0x0d030011, // for qp4
- 0x17210011, // for qp5
- 0x41210011, // for qp6
- 0x19210011, // for qp7
- 0x25050003, // for qp8
- 0x1b130003, // for qp9
- 0x1d130003, // for qp10
- 0x27070021, // for qp11
- 0x51310021, // for qp12
- 0x29090021, // for qp13
- 0x35150005, // for qp14
- 0x2b0b0013, // for qp15
- 0x2d0d0013, // for qp16
- 0x37170007, // for qp17
- 0x61410031, // for qp18
- 0x39190009, // for qp19
- 0x45250015, // for qp20
- 0x3b1b000b, // for qp21
- 0x3d1d000d, // for qp22
- 0x47270017, // for qp23
- 0x71510041, // for qp24 ! center for qp=0..30
- 0x49290019, // for qp25
- 0x55350025, // for qp26
- 0x4b2b001b, // for qp27
- 0x4d2d001d, // for qp28
- 0x57370027, // for qp29
- 0x81610051, // for qp30
- 0x57270017, // for qp31
- 0x81510041, // for qp32 ! center for qp=31..51
- 0x59290019, // for qp33
- 0x65350025, // for qp34
- 0x5b2b001b, // for qp35
- 0x5d2d001d, // for qp36
- 0x67370027, // for qp37
- 0x91610051, // for qp38
- 0x69390029, // for qp39
- 0x75450035, // for qp40
- 0x6b3b002b, // for qp41
- 0x6d3d002d, // for qp42
- 0x77470037, // for qp43
- 0xa1710061, // for qp44
- 0x79490039, // for qp45
- 0x85550045, // for qp46
- 0x7b4b003b, // for qp47
- 0x7d4d003d, // for qp48
- 0x87570047, // for qp49
- 0xb1810071, // for qp50
- 0x89590049 // for qp51
-};
-
-static void gen7_vme_state_setup_fixup(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- unsigned int *vme_state_message)
-{
- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
- VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
- VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
-
- if (slice_param->slice_type != SLICE_TYPE_I &&
- slice_param->slice_type != SLICE_TYPE_SI)
- return;
- if (encoder_context->rate_control_mode == VA_RC_CQP)
- vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
- else
- vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
-}
static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
struct encode_state *encode_state,
@@ -461,48 +367,50 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
unsigned int *vme_state_message;
- unsigned int *mb_cost_table;
+ unsigned int *mb_cost_table;
int i;
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
- mb_cost_table = (unsigned int *)vme_context->vme_state_message;
+ mb_cost_table = (unsigned int *)vme_context->vme_state_message;
//building VME state message
dri_bo_map(vme_context->vme_state.bo, 1);
assert(vme_context->vme_state.bo->virtual);
vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
- if ((slice_param->slice_type == SLICE_TYPE_P) ||
- (slice_param->slice_type == SLICE_TYPE_SP)) {
- vme_state_message[0] = 0x01010101;
- vme_state_message[1] = 0x10010101;
- vme_state_message[2] = 0x0F0F0F0F;
- vme_state_message[3] = 0x100F0F0F;
- vme_state_message[4] = 0x01010101;
- vme_state_message[5] = 0x10010101;
- vme_state_message[6] = 0x0F0F0F0F;
- vme_state_message[7] = 0x100F0F0F;
- vme_state_message[8] = 0x01010101;
- vme_state_message[9] = 0x10010101;
- vme_state_message[10] = 0x0F0F0F0F;
- vme_state_message[11] = 0x000F0F0F;
- vme_state_message[12] = 0x00;
- vme_state_message[13] = 0x00;
- } else {
- vme_state_message[0] = 0x10010101;
- vme_state_message[1] = 0x100F0F0F;
- vme_state_message[2] = 0x10010101;
- vme_state_message[3] = 0x000F0F0F;
- vme_state_message[4] = 0;
- vme_state_message[5] = 0;
- vme_state_message[6] = 0;
- vme_state_message[7] = 0;
- vme_state_message[8] = 0;
- vme_state_message[9] = 0;
- vme_state_message[10] = 0;
- vme_state_message[11] = 0;
- vme_state_message[12] = 0;
- vme_state_message[13] = 0;
- }
+ if (((slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) &&
+ !is_low_quality)) {
+ vme_state_message[0] = 0x01010101;
+ vme_state_message[1] = 0x10010101;
+ vme_state_message[2] = 0x0F0F0F0F;
+ vme_state_message[3] = 0x100F0F0F;
+ vme_state_message[4] = 0x01010101;
+ vme_state_message[5] = 0x10010101;
+ vme_state_message[6] = 0x0F0F0F0F;
+ vme_state_message[7] = 0x100F0F0F;
+ vme_state_message[8] = 0x01010101;
+ vme_state_message[9] = 0x10010101;
+ vme_state_message[10] = 0x0F0F0F0F;
+ vme_state_message[11] = 0x000F0F0F;
+ vme_state_message[12] = 0x00;
+ vme_state_message[13] = 0x00;
+ } else {
+ vme_state_message[0] = 0x10010101;
+ vme_state_message[1] = 0x100F0F0F;
+ vme_state_message[2] = 0x10010101;
+ vme_state_message[3] = 0x000F0F0F;
+ vme_state_message[4] = 0;
+ vme_state_message[5] = 0;
+ vme_state_message[6] = 0;
+ vme_state_message[7] = 0;
+ vme_state_message[8] = 0;
+ vme_state_message[9] = 0;
+ vme_state_message[10] = 0;
+ vme_state_message[11] = 0;
+ vme_state_message[12] = 0;
+ vme_state_message[13] = 0;
+ }
vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
vme_state_message[15] = 0;
@@ -519,14 +427,17 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
-static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx,
- struct encode_state *encode_state,
- int is_intra,
- struct intel_encoder_context *encoder_context)
+static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
unsigned int *vme_state_message;
int i;
+ unsigned int *mb_cost_table;
+
+ mb_cost_table = (unsigned int *)vme_context->vme_state_message;
//building VME state message
dri_bo_map(vme_context->vme_state.bo, 1);
@@ -548,20 +459,18 @@ static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx,
vme_state_message[12] = 0x00;
vme_state_message[13] = 0x00;
- vme_state_message[14] = 0x4a4a;
- vme_state_message[15] = 0x0;
- vme_state_message[16] = 0x4a4a4a4a;
- vme_state_message[17] = 0x4a4a4a4a;
- vme_state_message[18] = 0x21110100;
- vme_state_message[19] = 0x61514131;
+ vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
+ vme_state_message[15] = 0;
+ vme_state_message[16] = mb_cost_table[0];
+ vme_state_message[17] = 0;
+ vme_state_message[18] = mb_cost_table[3];
+ vme_state_message[19] = mb_cost_table[4];
for(i = 20; i < 32; i++) {
vme_state_message[i] = 0;
}
//vme_state_message[16] = 0x42424242; //cost function LUT set 0 for Intra
- gen7_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
-
dri_bo_unmap( vme_context->vme_state.bo);
return VA_STATUS_SUCCESS;
}
@@ -637,7 +546,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx,
/*inline data */
*command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
- *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+ *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
i += 1;
}
@@ -691,47 +600,52 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx,
int s;
bool allow_hwscore = true;
int kernel_shader;
-
- for (s = 0; s < encode_state->num_slice_params_ext; s++) {
- pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
- if ((pSliceParameter->macroblock_address % width_in_mbs)) {
- allow_hwscore = false;
- break;
- }
+ unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
+
+ if (is_low_quality)
+ allow_hwscore = false;
+ else {
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ if ((pSliceParameter->macroblock_address % width_in_mbs)) {
+ allow_hwscore = false;
+ break;
+ }
+ }
}
if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
(pSliceParameter->slice_type == SLICE_TYPE_I)) {
kernel_shader = AVC_VME_INTRA_SHADER;
} else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
- (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
+ (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
kernel_shader = AVC_VME_INTER_SHADER;
} else {
kernel_shader = AVC_VME_BINTER_SHADER;
if (!allow_hwscore)
- kernel_shader = AVC_VME_INTER_SHADER;
+ kernel_shader = AVC_VME_INTER_SHADER;
}
if (allow_hwscore)
gen7_vme_walker_fill_vme_batchbuffer(ctx,
- encode_state,
- width_in_mbs, height_in_mbs,
- kernel_shader,
- pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
- encoder_context);
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
+ encoder_context);
else
gen7_vme_fill_vme_batchbuffer(ctx,
- encode_state,
- width_in_mbs, height_in_mbs,
- kernel_shader,
- pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
- encoder_context);
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
+ encoder_context);
intel_batchbuffer_start_atomic(batch, 0x1000);
gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
BEGIN_BATCH(batch, 2);
- OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(batch,
vme_context->vme_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
@@ -752,7 +666,7 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx,
struct gen6_vme_context *vme_context = encoder_context->vme_context;
if (!vme_context->h264_level ||
- (vme_context->h264_level != pSequenceParameter->level_idc)) {
+ (vme_context->h264_level != pSequenceParameter->level_idc)) {
vme_context->h264_level = pSequenceParameter->level_idc;
}
@@ -803,10 +717,10 @@ gen7_vme_pipeline(VADriverContextP ctx,
static void
gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
- struct encode_state *encode_state,
- int index,
- int is_intra,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ int index,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
@@ -837,9 +751,9 @@ gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
static void
gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
- struct encode_state *encode_state,
- int index,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ int index,
+ struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
@@ -864,9 +778,9 @@ gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
static VAStatus
gen7_vme_mpeg2_surface_setup(VADriverContextP ctx,
- struct encode_state *encode_state,
- int is_intra,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
{
struct object_surface *obj_surface;
@@ -897,14 +811,13 @@ gen7_vme_mpeg2_surface_setup(VADriverContextP ctx,
static void
gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- int mb_width, int mb_height,
- int kernel,
- int transform_8x8_mode_flag,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
- int number_mb_cmds;
int mb_x = 0, mb_y = 0;
int i, s, j;
unsigned int *command_ptr;
@@ -918,33 +831,43 @@ gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
int slice_mb_begin = slice_param->macroblock_address;
int slice_mb_number = slice_param->num_macroblocks;
+ unsigned int mb_intra_ub;
for (i = 0; i < slice_mb_number;) {
- int mb_count = i + slice_mb_begin;
+ int mb_count = i + slice_mb_begin;
mb_x = mb_count % mb_width;
mb_y = mb_count / mb_width;
+ mb_intra_ub = 0;
- if( i == 0) {
- number_mb_cmds = mb_width;
- } else if ((i + 128) <= slice_mb_number) {
- number_mb_cmds = 128;
- } else {
- number_mb_cmds = slice_mb_number - i;
+ if (mb_x != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
}
+ if (mb_y != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+
+ if (mb_x != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (mb_x != (mb_width -1))
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ }
+
+
+
*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
*command_ptr++ = kernel;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
-
+
/*inline data */
*command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
- *command_ptr++ = ( (number_mb_cmds << 16) | transform_8x8_mode_flag | ((i == 0) << 1));
+ *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
- i += number_mb_cmds;
+ i += 1;
}
slice_param++;
@@ -959,9 +882,9 @@ gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
static void
gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
- struct encode_state *encode_state,
- int is_intra,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
struct intel_batchbuffer *batch = encoder_context->base.batch;
@@ -969,17 +892,39 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
- gen7_vme_mpeg2_fill_vme_batchbuffer(ctx,
- encode_state,
- width_in_mbs, height_in_mbs,
- MPEG2_VME_INTER_SHADER,
- 0,
- encoder_context);
+ bool allow_hwscore = true;
+ int s;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ int j;
+ VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
+
+ for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
+ if (slice_param->macroblock_address % width_in_mbs) {
+ allow_hwscore = false;
+ break;
+ }
+ }
+ }
+
+ if (allow_hwscore)
+ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ MPEG2_VME_INTER_SHADER,
+ encoder_context);
+ else
+ gen7_vme_mpeg2_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ MPEG2_VME_INTER_SHADER,
+ 0,
+ encoder_context);
intel_batchbuffer_start_atomic(batch, 0x1000);
gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
BEGIN_BATCH(batch, 2);
- OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(batch,
vme_context->vme_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
@@ -991,16 +936,25 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
static VAStatus
gen7_vme_mpeg2_prepare(VADriverContextP ctx,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
VAStatus vaStatus = VA_STATUS_SUCCESS;
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ if ((!vme_context->mpeg2_level) ||
+ (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
+ vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
+ }
- /*Setup all the memory object*/
+ /*Setup all the memory object*/
+
+ intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context);
gen7_vme_interface_setup(ctx, encode_state, encoder_context);
- gen7_vme_vme_state_setup(ctx, encode_state, 0, encoder_context);
gen7_vme_constant_setup(ctx, encode_state, encoder_context);
+ gen7_vme_mpeg2_state_setup(ctx, encode_state, 0, encoder_context);
/*Programing media pipeline*/
gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context);
@@ -1010,34 +964,34 @@ gen7_vme_mpeg2_prepare(VADriverContextP ctx,
static VAStatus
gen7_vme_mpeg2_pipeline(VADriverContextP ctx,
- VAProfile profile,
- struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context)
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context = encoder_context->vme_context;
VAEncSliceParameterBufferMPEG2 *slice_param =
(VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
VAEncSequenceParameterBufferMPEG2 *seq_param =
- (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
/*No need of to exec VME for Intra slice */
if (slice_param->is_intra_slice) {
- if(!vme_context->vme_output.bo) {
- int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
- int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
-
- vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs;
- vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
- vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
- vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
- "MPEG2 VME output buffer",
- vme_context->vme_output.num_blocks
- * vme_context->vme_output.size_block,
- 0x1000);
- }
-
- return VA_STATUS_SUCCESS;
+ if(!vme_context->vme_output.bo) {
+ int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
+ int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+
+ vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs;
+ vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
+ vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
+ vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "MPEG2 VME output buffer",
+ vme_context->vme_output.num_blocks
+ * vme_context->vme_output.size_block,
+ 0x1000);
+ }
+
+ return VA_STATUS_SUCCESS;
}
gen7_vme_media_init(ctx, encoder_context);
@@ -1078,7 +1032,7 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
struct i965_kernel *vme_kernel_list = NULL;
vme_context->gpe_context.surface_state_binding_table.length =
- (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
@@ -1092,21 +1046,18 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
gen7_vme_scoreboard_init(ctx, vme_context);
- if(encoder_context->profile == VAProfileH264Baseline ||
- encoder_context->profile == VAProfileH264Main ||
- encoder_context->profile == VAProfileH264High ){
+ if (encoder_context->codec == CODEC_H264) {
vme_kernel_list = gen7_vme_kernels;
vme_context->video_coding_type = VIDEO_CODING_AVC;
vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM;
encoder_context->vme_pipeline = gen7_vme_pipeline;
- } else if (encoder_context->profile == VAProfileMPEG2Simple ||
- encoder_context->profile == VAProfileMPEG2Main ){
+ } else if (encoder_context->codec == CODEC_MPEG2) {
vme_kernel_list = gen7_vme_mpeg2_kernels;
vme_context->video_coding_type = VIDEO_CODING_MPEG2;
vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM;
encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline;
} else {
- /* Unsupported encoding profile */
+ /* Unsupported codec */
assert(0);
}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
new file mode 100644
index 0000000..2d76816
--- /dev/null
+++ b/src/gen8_mfc.c
@@ -0,0 +1,2478 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ * Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "i965_defines.h"
+#include "i965_structs.h"
+#include "i965_drv_video.h"
+#include "i965_encoder.h"
+#include "i965_encoder_utils.h"
+#include "gen6_mfc.h"
+#include "gen6_vme.h"
+#include "intel_media.h"
+
+#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
+
+#define MFC_SOFTWARE_HASWELL 1
+
+#define B0_STEP_REV 2
+#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
+
+static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = {
+#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
+};
+
+static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = {
+#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
+};
+
+static struct i965_kernel gen8_mfc_kernels[] = {
+ {
+ "MFC AVC INTRA BATCHBUFFER ",
+ MFC_BATCHBUFFER_AVC_INTRA,
+ gen8_mfc_batchbuffer_avc_intra,
+ sizeof(gen8_mfc_batchbuffer_avc_intra),
+ NULL
+ },
+
+ {
+ "MFC AVC INTER BATCHBUFFER ",
+ MFC_BATCHBUFFER_AVC_INTER,
+ gen8_mfc_batchbuffer_avc_inter,
+ sizeof(gen8_mfc_batchbuffer_avc_inter),
+ NULL
+ },
+};
+
+#define INTER_MODE_MASK 0x03
+#define INTER_8X8 0x03
+#define INTER_16X8 0x01
+#define INTER_8X16 0x02
+#define SUBMB_SHAPE_MASK 0x00FF00
+
+#define INTER_MV8 (4 << 20)
+#define INTER_MV32 (6 << 20)
+
+
+static void
+gen8_mfc_pipe_mode_select(VADriverContextP ctx,
+ int standard_select,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ assert(standard_select == MFX_FORMAT_MPEG2 ||
+ standard_select == MFX_FORMAT_AVC);
+
+ BEGIN_BCS_BATCH(batch, 5);
+
+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
+ (MFD_MODE_VLD << 15) | /* VLD mode */
+ (0 << 10) | /* Stream-Out Enable */
+ ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
+ ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
+ (0 << 5) | /* not in stitch mode */
+ (1 << 4) | /* encoding mode */
+ (standard_select << 0)); /* standard select: avc or mpeg2 */
+ OUT_BCS_BATCH(batch,
+ (0 << 7) | /* expand NOA bus flag */
+ (0 << 6) | /* disable slice-level clock gating */
+ (0 << 5) | /* disable clock gating for NOA */
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ BEGIN_BCS_BATCH(batch, 6);
+
+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ ((mfc_context->surface_state.height - 1) << 18) |
+ ((mfc_context->surface_state.width - 1) << 4));
+ OUT_BCS_BATCH(batch,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+ (0 << 22) | /* surface object control state, FIXME??? */
+ ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 for interleave U/V */
+ (1 << 1) | /* must be tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* must be 0 for interleave U/V */
+ (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int vme_size;
+
+ BEGIN_BCS_BATCH(batch, 26);
+
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+ /* the DW1-3 is for the MFX indirect bistream offset */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* the DW4-5 is the MFX upper bound */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
+ /* the DW6-10 is for MFX Indirect MV Object Base Address */
+ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
+ OUT_BCS_RELOC(batch,
+ mfc_context->mfc_indirect_pak_bse_object.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_RELOC(batch,
+ mfc_context->mfc_indirect_pak_bse_object.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ mfc_context->mfc_indirect_pak_bse_object.end_offset);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+
+ BEGIN_BCS_BATCH(batch, 16);
+
+ OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+ /*DW1. MB setting of frame */
+ OUT_BCS_BATCH(batch,
+ ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
+ OUT_BCS_BATCH(batch,
+ ((height_in_mbs - 1) << 16) |
+ ((width_in_mbs - 1) << 0));
+ /* DW3 QP setting */
+ OUT_BCS_BATCH(batch,
+ (0 << 24) | /* Second Chroma QP Offset */
+ (0 << 16) | /* Chroma QP Offset */
+ (0 << 14) | /* Max-bit conformance Intra flag */
+ (0 << 13) | /* Max Macroblock size conformance Inter flag */
+ (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
+ (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
+ (0 << 8) | /* FIXME: Image Structure */
+ (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* Mininum Frame size */
+ (0 << 15) | /* Disable reading of Macroblock Status Buffer */
+ (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
+ (0 << 13) | /* CABAC 0 word insertion test enable */
+ (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
+ (1 << 10) | /* Chroma Format IDC, 4:2:0 */
+ (0 << 8) | /* FIXME: MbMvFormatFlag */
+ (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
+ (0 << 6) | /* Only valid for VLD decoding mode */
+ (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
+ (0 << 4) | /* Direct 8x8 inference flag */
+ (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
+ (1 << 2) | /* Frame MB only flag */
+ (0 << 1) | /* MBAFF mode is in active */
+ (0 << 0)); /* Field picture flag */
+ /* DW5 Trellis quantization */
+ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
+ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
+ (0xBB8 << 16) | /* InterMbMaxSz */
+ (0xEE8) ); /* IntraMbMaxSz */
+ OUT_BCS_BATCH(batch, 0); /* Reserved */
+ /* DW8. QP delta */
+ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
+ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
+ /* DW10. Bit setting for MB */
+ OUT_BCS_BATCH(batch, 0x8C000000);
+ OUT_BCS_BATCH(batch, 0x00010000);
+ /* DW12. */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0x02010100);
+ /* DW14. For short format */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_qm_state(VADriverContextP ctx,
+ int qm_type,
+ unsigned int *qm,
+ int qm_length,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ unsigned int qm_buffer[16];
+
+ assert(qm_length <= 16);
+ assert(sizeof(*qm) == 4);
+ memcpy(qm_buffer, qm, qm_length * 4);
+
+ BEGIN_BCS_BATCH(batch, 18);
+ OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+ OUT_BCS_BATCH(batch, qm_type << 0);
+ intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ unsigned int qm[16] = {
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010
+ };
+
+ gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
+ gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
+ gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
+ gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
+}
+
+static void
+gen8_mfc_fqm_state(VADriverContextP ctx,
+ int fqm_type,
+ unsigned int *fqm,
+ int fqm_length,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ unsigned int fqm_buffer[32];
+
+ assert(fqm_length <= 32);
+ assert(sizeof(*fqm) == 4);
+ memcpy(fqm_buffer, fqm, fqm_length * 4);
+
+ BEGIN_BCS_BATCH(batch, 34);
+ OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
+ OUT_BCS_BATCH(batch, fqm_type << 0);
+ intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ unsigned int qm[32] = {
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000
+ };
+
+ gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
+ gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
+ gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
+ gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
+}
+
+static void
+gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
+ unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
+ int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
+ struct intel_batchbuffer *batch)
+{
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ if (data_bits_in_last_dw == 0)
+ data_bits_in_last_dw = 32;
+
+ BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
+
+ OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* always start at offset 0 */
+ (data_bits_in_last_dw << 8) |
+ (skip_emul_byte_count << 4) |
+ (!!emulation_flag << 3) |
+ ((!!is_last_header) << 2) |
+ ((!!is_end_of_slice) << 1) |
+ (0 << 0)); /* FIXME: ??? */
+ intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void gen8_mfc_init(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ dri_bo *bo;
+ int i;
+ int width_in_mbs = 0;
+ int height_in_mbs = 0;
+ int slice_batchbuffer_size;
+
+ if (encoder_context->codec == CODEC_H264 ||
+ encoder_context->codec == CODEC_H264_MVC) {
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ } else {
+ VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+
+ assert(encoder_context->codec == CODEC_MPEG2);
+
+ width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
+ height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
+ }
+
+ slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
+ (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
+
+ /*Encode common setup for MFC*/
+ dri_bo_unreference(mfc_context->post_deblocking_output.bo);
+ mfc_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
+ mfc_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
+ mfc_context->uncompressed_picture_source.bo = NULL;
+
+ dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
+ mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
+
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+ if ( mfc_context->direct_mv_buffers[i].bo != NULL);
+ dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
+ mfc_context->direct_mv_buffers[i].bo = NULL;
+ }
+
+ for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
+ if (mfc_context->reference_surfaces[i].bo != NULL)
+ dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
+ mfc_context->reference_surfaces[i].bo = NULL;
+ }
+
+ dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ width_in_mbs * 64,
+ 64);
+ assert(bo);
+ mfc_context->intra_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ width_in_mbs * height_in_mbs * 16,
+ 64);
+ assert(bo);
+ mfc_context->macroblock_status_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
+ 64);
+ assert(bo);
+ mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
+ 0x1000);
+ assert(bo);
+ mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
+ mfc_context->mfc_batchbuffer_surface.bo = NULL;
+
+ dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
+ mfc_context->aux_batchbuffer_surface.bo = NULL;
+
+ if (mfc_context->aux_batchbuffer)
+ intel_batchbuffer_free(mfc_context->aux_batchbuffer);
+
+ mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
+ mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
+ dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
+ mfc_context->aux_batchbuffer_surface.pitch = 16;
+ mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
+ mfc_context->aux_batchbuffer_surface.size_block = 16;
+
+ i965_gpe_context_init(ctx, &mfc_context->gpe_context);
+}
+
+static void
+gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 61);
+
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+
+ /* the DW1-3 is for pre_deblocking */
+ if (mfc_context->pre_deblocking_output.bo)
+ OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0); /* pre output addr */
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* the DW4-6 is for the post_deblocking */
+
+ if (mfc_context->post_deblocking_output.bo)
+ OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* post output addr */
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW7-9 is for the uncompressed_picture */
+ OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* uncompressed data */
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW10-12 is for the mb status */
+ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* StreamOut data*/
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW13-15 is for the intra_row_store_scratch */
+ OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW16-18 is for the deblocking filter */
+ OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 19-50 is for Reference pictures*/
+ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
+ if ( mfc_context->reference_surfaces[i].bo != NULL) {
+ OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+
+ /* The DW 52-54 is for the MB status buffer */
+ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* Macroblock status buffer*/
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 55-57 is the ILDB buffer */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 58-60 is the second ILDB buffer */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_avc_directmode_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 71);
+
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+ /* Reference frames and Current frames */
+ /* the DW1-32 is for the direct MV for reference */
+ for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
+ if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW34-36 is the MV for the current reference */
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* POL list */
+ for(i = 0; i < 32; i++) {
+ OUT_BCS_BATCH(batch, i/2);
+ }
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ BEGIN_BCS_BATCH(batch, 10);
+
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+ OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW7-9 is for Bitplane Read Buffer Base Address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
+ mfc_context->set_surface_state(ctx, encoder_context);
+ mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
+ gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
+ gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
+ mfc_context->avc_img_state(ctx, encode_state, encoder_context);
+ mfc_context->avc_qm_state(ctx, encoder_context);
+ mfc_context->avc_fqm_state(ctx, encoder_context);
+ gen8_mfc_avc_directmode_state(ctx, encoder_context);
+ intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
+}
+
+
+static VAStatus gen8_mfc_run(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+
+ intel_batchbuffer_flush(batch); //run the pipeline
+
+ return VA_STATUS_SUCCESS;
+}
+
+
+static VAStatus
+gen8_mfc_stop(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int *encoded_bits_size)
+{
+ VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VACodedBufferSegment *coded_buffer_segment;
+
+ vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
+ assert(vaStatus == VA_STATUS_SUCCESS);
+ *encoded_bits_size = coded_buffer_segment->size * 8;
+ i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
+
+ return VA_STATUS_SUCCESS;
+}
+
+
+static void
+gen8_mfc_avc_slice_state(VADriverContextP ctx,
+ VAEncPictureParameterBufferH264 *pic_param,
+ VAEncSliceParameterBufferH264 *slice_param,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int rate_control_enable,
+ int qp,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ int beginmb = slice_param->macroblock_address;
+ int endmb = beginmb + slice_param->num_macroblocks;
+ int beginx = beginmb % width_in_mbs;
+ int beginy = beginmb / width_in_mbs;
+ int nextx = endmb % width_in_mbs;
+ int nexty = endmb / width_in_mbs;
+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ int last_slice = (endmb == (width_in_mbs * height_in_mbs));
+ int maxQpN, maxQpP;
+ unsigned char correct[6], grow, shrink;
+ int i;
+ int weighted_pred_idc = 0;
+ unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
+ unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
+ int num_ref_l0 = 0, num_ref_l1 = 0;
+
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ if (slice_type == SLICE_TYPE_I) {
+ luma_log2_weight_denom = 0;
+ chroma_log2_weight_denom = 0;
+ } else if (slice_type == SLICE_TYPE_P) {
+ weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
+
+ if (slice_param->num_ref_idx_active_override_flag)
+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ } else if (slice_type == SLICE_TYPE_B) {
+ weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
+
+ if (slice_param->num_ref_idx_active_override_flag) {
+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+ }
+
+ if (weighted_pred_idc == 2) {
+ /* 8.4.3 - Derivation process for prediction weights (8-279) */
+ luma_log2_weight_denom = 5;
+ chroma_log2_weight_denom = 5;
+ }
+ }
+
+ maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
+ maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
+
+ for (i = 0; i < 6; i++)
+ correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
+
+ grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
+ (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
+ shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
+ (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
+
+ BEGIN_BCS_BATCH(batch, 11);;
+
+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
+ OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
+
+ OUT_BCS_BATCH(batch,
+ (num_ref_l0 << 16) |
+ (num_ref_l1 << 24) |
+ (chroma_log2_weight_denom << 8) |
+ (luma_log2_weight_denom << 0));
+
+ OUT_BCS_BATCH(batch,
+ (weighted_pred_idc << 30) |
+ (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
+ (slice_param->disable_deblocking_filter_idc << 27) |
+ (slice_param->cabac_init_idc << 24) |
+ (qp<<16) | /*Slice Quantization Parameter*/
+ ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+ ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+ OUT_BCS_BATCH(batch,
+ (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
+ (beginx << 16) |
+ slice_param->macroblock_address );
+ OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
+ OUT_BCS_BATCH(batch,
+ (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
+ (1 << 30) | /*ResetRateControlCounter*/
+ (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
+ (4 << 24) | /*RC Stable Tolerance, middle level*/
+ (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
+ (0 << 22) | /*QP mode, don't modfiy CBP*/
+ (0 << 21) | /*MB Type Direct Conversion Enabled*/
+ (0 << 20) | /*MB Type Skip Conversion Enabled*/
+ (last_slice << 19) | /*IsLastSlice*/
+ (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
+ (1 << 17) | /*HeaderPresentFlag*/
+ (1 << 16) | /*SliceData PresentFlag*/
+ (1 << 15) | /*TailPresentFlag*/
+ (1 << 13) | /*RBSP NAL TYPE*/
+ (0 << 12) ); /*CabacZeroWordInsertionEnable*/
+ OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
+ OUT_BCS_BATCH(batch,
+ (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
+ (maxQpP << 16) | /*Target QP + 20 is highest QP*/
+ (shrink << 8) |
+ (grow << 0));
+ OUT_BCS_BATCH(batch,
+ (correct[5] << 20) |
+ (correct[4] << 16) |
+ (correct[3] << 12) |
+ (correct[2] << 8) |
+ (correct[1] << 4) |
+ (correct[0] << 0));
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+#ifdef MFC_SOFTWARE_HASWELL
+
+static int
+gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
+ int qp,unsigned int *msg,
+ struct intel_encoder_context *encoder_context,
+ unsigned char target_mb_size, unsigned char max_mb_size,
+ struct intel_batchbuffer *batch)
+{
+ int len_in_dwords = 12;
+ unsigned int intra_msg;
+#define INTRA_MSG_FLAG (1 << 13)
+#define INTRA_MBTYPE_MASK (0x1F0000)
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+ intra_msg = msg[0] & 0xC0FF;
+ intra_msg |= INTRA_MSG_FLAG;
+ intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
+ OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ (0 << 24) | /* PackedMvNum, Debug*/
+ (0 << 20) | /* No motion vector */
+ (1 << 19) | /* CbpDcY */
+ (1 << 18) | /* CbpDcU */
+ (1 << 17) | /* CbpDcV */
+ intra_msg);
+
+ OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
+ OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
+ OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
+
+ /*Stuff for Intra MB*/
+ OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
+ OUT_BCS_BATCH(batch, msg[2]);
+ OUT_BCS_BATCH(batch, msg[3]&0xFF);
+
+ /*MaxSizeInWord and TargetSzieInWord*/
+ OUT_BCS_BATCH(batch, (max_mb_size << 24) |
+ (target_mb_size << 16) );
+
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ return len_in_dwords;
+}
+
+static int
+gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
+ unsigned int *msg, unsigned int offset,
+ struct intel_encoder_context *encoder_context,
+ unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int len_in_dwords = 12;
+ unsigned int inter_msg = 0;
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+ {
+#define MSG_MV_OFFSET 4
+ unsigned int *mv_ptr;
+ mv_ptr = msg + MSG_MV_OFFSET;
+ /* MV of VME output is based on 16 sub-blocks. So it is necessary
+ * to convert them to be compatible with the format of AVC_PAK
+ * command.
+ */
+ if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
+ /* MV[0] and MV[2] are replicated */
+ mv_ptr[4] = mv_ptr[0];
+ mv_ptr[5] = mv_ptr[1];
+ mv_ptr[2] = mv_ptr[8];
+ mv_ptr[3] = mv_ptr[9];
+ mv_ptr[6] = mv_ptr[8];
+ mv_ptr[7] = mv_ptr[9];
+ } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
+ /* MV[0] and MV[1] are replicated */
+ mv_ptr[2] = mv_ptr[0];
+ mv_ptr[3] = mv_ptr[1];
+ mv_ptr[4] = mv_ptr[16];
+ mv_ptr[5] = mv_ptr[17];
+ mv_ptr[6] = mv_ptr[24];
+ mv_ptr[7] = mv_ptr[25];
+ } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
+ !(msg[1] & SUBMB_SHAPE_MASK)) {
+ /* Don't touch MV[0] or MV[1] */
+ mv_ptr[2] = mv_ptr[8];
+ mv_ptr[3] = mv_ptr[9];
+ mv_ptr[4] = mv_ptr[16];
+ mv_ptr[5] = mv_ptr[17];
+ mv_ptr[6] = mv_ptr[24];
+ mv_ptr[7] = mv_ptr[25];
+ }
+ }
+
+ BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+ OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+
+ inter_msg = 32;
+ /* MV quantity */
+ if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
+ if (msg[1] & SUBMB_SHAPE_MASK)
+ inter_msg = 128;
+ }
+ OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
+ OUT_BCS_BATCH(batch, offset);
+ inter_msg = msg[0] & (0x1F00FFFF);
+ inter_msg |= INTER_MV8;
+ inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
+ if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
+ (msg[1] & SUBMB_SHAPE_MASK)) {
+ inter_msg |= INTER_MV32;
+ }
+
+ OUT_BCS_BATCH(batch, inter_msg);
+
+ OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
+ OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
+#if 0
+ if ( slice_type == SLICE_TYPE_B) {
+ OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
+ } else {
+ OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
+ }
+#else
+ OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
+#endif
+
+ inter_msg = msg[1] >> 8;
+ /*Stuff for Inter MB*/
+ OUT_BCS_BATCH(batch, inter_msg);
+ OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
+ OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
+
+ /*MaxSizeInWord and TargetSzieInWord*/
+ OUT_BCS_BATCH(batch, (max_mb_size << 24) |
+ (target_mb_size << 16) );
+
+ OUT_BCS_BATCH(batch, 0x0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ return len_in_dwords;
+}
+
+#define AVC_INTRA_RDO_OFFSET 4
+#define AVC_INTER_RDO_OFFSET 10
+#define AVC_INTER_MSG_OFFSET 8
+#define AVC_INTER_MV_OFFSET 48
+#define AVC_RDO_MASK 0xFFFF
+
+static void
+gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index,
+ struct intel_batchbuffer *slice_batch)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
+ unsigned int *msg = NULL, offset = 0;
+ unsigned char *msg_ptr = NULL;
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
+ int i,x,y;
+ int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
+ unsigned int rate_control_mode = encoder_context->rate_control_mode;
+ unsigned int tail_data[] = { 0x0, 0x0 };
+ int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+ int is_intra = slice_type == SLICE_TYPE_I;
+ int qp_slice;
+
+ qp_slice = qp;
+ if (rate_control_mode == VA_RC_CBR) {
+ qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
+ if (encode_state->slice_header_index[slice_index] == 0) {
+ pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ qp_slice = qp;
+ }
+ }
+
+ /* only support for 8-bit pixel bit-depth */
+ assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
+ assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
+ assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
+ assert(qp >= 0 && qp < 52);
+
+ gen8_mfc_avc_slice_state(ctx,
+ pPicParameter,
+ pSliceParameter,
+ encode_state, encoder_context,
+ (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+
+ if ( slice_index == 0)
+ intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
+
+ intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
+
+ dri_bo_map(vme_context->vme_output.bo , 1);
+ msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
+
+ if (is_intra) {
+ msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
+ } else {
+ msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
+ }
+
+ for (i = pSliceParameter->macroblock_address;
+ i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
+ int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
+ x = i % width_in_mbs;
+ y = i / width_in_mbs;
+ msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
+
+ if (is_intra) {
+ assert(msg);
+ gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
+ } else {
+ int inter_rdo, intra_rdo;
+ inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
+ intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
+ offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
+ if (intra_rdo < inter_rdo) {
+ gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
+ } else {
+ msg += AVC_INTER_MSG_OFFSET;
+ gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
+ }
+ }
+ }
+
+ dri_bo_unmap(vme_context->vme_output.bo);
+
+ if ( last_slice ) {
+ mfc_context->insert_object(ctx, encoder_context,
+ tail_data, 2, 8,
+ 2, 1, 1, 0, slice_batch);
+ } else {
+ mfc_context->insert_object(ctx, encoder_context,
+ tail_data, 1, 8,
+ 1, 1, 1, 0, slice_batch);
+ }
+}
+
+static dri_bo *
+gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch;
+ dri_bo *batch_bo;
+ int i;
+
+ batch = mfc_context->aux_batchbuffer;
+ batch_bo = batch->buffer;
+ for (i = 0; i < encode_state->num_slice_params_ext; i++) {
+ gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
+ }
+
+ intel_batchbuffer_align(batch, 8);
+
+ BEGIN_BCS_BATCH(batch, 2);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_reference(batch_bo);
+ intel_batchbuffer_free(batch);
+ mfc_context->aux_batchbuffer = NULL;
+
+ return batch_bo;
+}
+
+#else
+
+static void
+gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ assert(vme_context->vme_output.bo);
+ mfc_context->buffer_suface_setup(ctx,
+ &mfc_context->gpe_context,
+ &vme_context->vme_output,
+ BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
+ SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
+ assert(mfc_context->aux_batchbuffer_surface.bo);
+ mfc_context->buffer_suface_setup(ctx,
+ &mfc_context->gpe_context,
+ &mfc_context->aux_batchbuffer_surface,
+ BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
+ SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
+}
+
+static void
+gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
+ mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
+ mfc_context->mfc_batchbuffer_surface.pitch = 16;
+ mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "MFC batchbuffer",
+ mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
+ 0x1000);
+ mfc_context->buffer_suface_setup(ctx,
+ &mfc_context->gpe_context,
+ &mfc_context->mfc_batchbuffer_surface,
+ BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
+ SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
+}
+
+static void
+gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
+ gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
+}
+
+static void
+gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct gen6_interface_descriptor_data *desc;
+ int i;
+ dri_bo *bo;
+
+ bo = mfc_context->gpe_context.idrt.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ desc = bo->virtual;
+
+ for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
+ struct i965_kernel *kernel;
+
+ kernel = &mfc_context->gpe_context.kernels[i];
+ assert(sizeof(*desc) == 32);
+
+ /*Setup the descritor table*/
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
+ desc->desc2.sampler_count = 0;
+ desc->desc2.sampler_state_pointer = 0;
+ desc->desc3.binding_table_entry_count = 2;
+ desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
+ desc->desc4.constant_urb_entry_read_offset = 0;
+ desc->desc4.constant_urb_entry_read_length = 4;
+
+ /*kernel start*/
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
+ kernel->bo);
+ desc++;
+ }
+
+ dri_bo_unmap(bo);
+}
+
+static void
+gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ (void)mfc_context;
+}
+
+static void
+gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
+ int index,
+ int head_offset,
+ int batchbuffer_offset,
+ int head_size,
+ int tail_size,
+ int number_mb_cmds,
+ int first_object,
+ int last_object,
+ int last_slice,
+ int mb_x,
+ int mb_y,
+ int width_in_mbs,
+ int qp)
+{
+ BEGIN_BATCH(batch, 12);
+
+ OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
+ OUT_BATCH(batch, index);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+
+ /*inline data */
+ OUT_BATCH(batch, head_offset);
+ OUT_BATCH(batch, batchbuffer_offset);
+ OUT_BATCH(batch,
+ head_size << 16 |
+ tail_size);
+ OUT_BATCH(batch,
+ number_mb_cmds << 16 |
+ first_object << 2 |
+ last_object << 1 |
+ last_slice);
+ OUT_BATCH(batch,
+ mb_y << 8 |
+ mb_x);
+ OUT_BATCH(batch,
+ qp << 16 |
+ width_in_mbs);
+
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ VAEncSliceParameterBufferH264 *slice_param,
+ int head_offset,
+ unsigned short head_size,
+ unsigned short tail_size,
+ int batchbuffer_offset,
+ int qp,
+ int last_slice)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int total_mbs = slice_param->num_macroblocks;
+ int number_mb_cmds = 128;
+ int starting_mb = 0;
+ int last_object = 0;
+ int first_object = 1;
+ int i;
+ int mb_x, mb_y;
+ int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
+
+ for (i = 0; i < total_mbs / number_mb_cmds; i++) {
+ last_object = (total_mbs - starting_mb) == number_mb_cmds;
+ mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
+ mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
+ assert(mb_x <= 255 && mb_y <= 255);
+
+ starting_mb += number_mb_cmds;
+
+ gen8_mfc_batchbuffer_emit_object_command(batch,
+ index,
+ head_offset,
+ batchbuffer_offset,
+ head_size,
+ tail_size,
+ number_mb_cmds,
+ first_object,
+ last_object,
+ last_slice,
+ mb_x,
+ mb_y,
+ width_in_mbs,
+ qp);
+
+ if (first_object) {
+ head_offset += head_size;
+ batchbuffer_offset += head_size;
+ }
+
+ if (last_object) {
+ head_offset += tail_size;
+ batchbuffer_offset += tail_size;
+ }
+
+ batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
+
+ first_object = 0;
+ }
+
+ if (!last_object) {
+ last_object = 1;
+ number_mb_cmds = total_mbs % number_mb_cmds;
+ mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
+ mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
+ assert(mb_x <= 255 && mb_y <= 255);
+ starting_mb += number_mb_cmds;
+
+ gen8_mfc_batchbuffer_emit_object_command(batch,
+ index,
+ head_offset,
+ batchbuffer_offset,
+ head_size,
+ tail_size,
+ number_mb_cmds,
+ first_object,
+ last_object,
+ last_slice,
+ mb_x,
+ mb_y,
+ width_in_mbs,
+ qp);
+ }
+}
+
+/*
+ * return size in Owords (16bytes)
+ */
+static int
+gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index,
+ int batchbuffer_offset)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
+ int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
+ unsigned int rate_control_mode = encoder_context->rate_control_mode;
+ unsigned int tail_data[] = { 0x0, 0x0 };
+ long head_offset;
+ int old_used = intel_batchbuffer_used_size(slice_batch), used;
+ unsigned short head_size, tail_size;
+ int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+ int qp_slice;
+
+ qp_slice = qp;
+ if (rate_control_mode == VA_RC_CBR) {
+ qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
+ if (encode_state->slice_header_index[slice_index] == 0) {
+ pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
+ qp_slice = qp;
+ }
+ }
+
+ /* only support for 8-bit pixel bit-depth */
+ assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
+ assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
+ assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
+ assert(qp >= 0 && qp < 52);
+
+ head_offset = old_used / 16;
+ gen8_mfc_avc_slice_state(ctx,
+ pPicParameter,
+ pSliceParameter,
+ encode_state,
+ encoder_context,
+ (rate_control_mode == VA_RC_CBR),
+ qp_slice,
+ slice_batch);
+
+ if (slice_index == 0)
+ intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
+
+ intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
+
+ intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
+ used = intel_batchbuffer_used_size(slice_batch);
+ head_size = (used - old_used) / 16;
+ old_used = used;
+
+ /* tail */
+ if (last_slice) {
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ tail_data,
+ 2,
+ 8,
+ 2,
+ 1,
+ 1,
+ 0,
+ slice_batch);
+ } else {
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ tail_data,
+ 1,
+ 8,
+ 1,
+ 1,
+ 1,
+ 0,
+ slice_batch);
+ }
+
+ intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
+ used = intel_batchbuffer_used_size(slice_batch);
+ tail_size = (used - old_used) / 16;
+
+
+ gen8_mfc_avc_batchbuffer_slice_command(ctx,
+ encoder_context,
+ pSliceParameter,
+ head_offset,
+ head_size,
+ tail_size,
+ batchbuffer_offset,
+ qp,
+ last_slice);
+
+ return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
+}
+
+static void
+gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ int i, size, offset = 0;
+ intel_batchbuffer_start_atomic(batch, 0x4000);
+ gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
+
+ for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
+ size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
+ offset += size;
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
+ gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
+ gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
+ gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
+}
+
+static dri_bo *
+gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
+ dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
+
+ return mfc_context->mfc_batchbuffer_surface.bo;
+}
+
+#endif
+
+static void
+gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ dri_bo *slice_batch_bo;
+
+ if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
+ fprintf(stderr, "Current VA driver don't support interlace mode!\n");
+ assert(0);
+ return;
+ }
+
+#ifdef MFC_SOFTWARE_HASWELL
+ slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
+#else
+ slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
+#endif
+
+ // begin programing
+ intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
+ intel_batchbuffer_emit_mi_flush(batch);
+
+ // picture level programing
+ gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
+
+ BEGIN_BCS_BATCH(batch, 3);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+ OUT_BCS_RELOC(batch,
+ slice_batch_bo,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+
+ // end programing
+ intel_batchbuffer_end_atomic(batch);
+
+ dri_bo_unreference(slice_batch_bo);
+}
+
+
+static VAStatus
+gen8_mfc_avc_encode_picture(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ unsigned int rate_control_mode = encoder_context->rate_control_mode;
+ int current_frame_bits_size;
+ int sts;
+
+ for (;;) {
+ gen8_mfc_init(ctx, encode_state, encoder_context);
+ intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
+ /*Programing bcs pipeline*/
+ gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
+ gen8_mfc_run(ctx, encode_state, encoder_context);
+ if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
+ sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
+ if (sts == BRC_NO_HRD_VIOLATION) {
+ intel_mfc_hrd_context_update(encode_state, mfc_context);
+ break;
+ }
+ else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
+ if (!mfc_context->hrd.violation_noted) {
+ fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
+ mfc_context->hrd.violation_noted = 1;
+ }
+ return VA_STATUS_SUCCESS;
+ }
+ } else {
+ break;
+ }
+ }
+
+ return VA_STATUS_SUCCESS;
+}
+
+/*
+ * MPEG-2
+ */
+
+static const int
+va_to_gen8_mpeg2_picture_type[3] = {
+ 1, /* I */
+ 2, /* P */
+ 3 /* B */
+};
+
+static void
+gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct encode_state *encode_state)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncPictureParameterBufferMPEG2 *pic_param;
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
+
+ assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
+ pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
+
+ BEGIN_BCS_BATCH(batch, 13);
+ OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
+ OUT_BCS_BATCH(batch,
+ (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
+ (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
+ (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
+ (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
+ pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
+ pic_param->picture_coding_extension.bits.picture_structure << 12 |
+ pic_param->picture_coding_extension.bits.top_field_first << 11 |
+ pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
+ pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
+ pic_param->picture_coding_extension.bits.q_scale_type << 8 |
+ pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
+ pic_param->picture_coding_extension.bits.alternate_scan << 6);
+ OUT_BCS_BATCH(batch,
+ 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
+ va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
+ 0);
+ OUT_BCS_BATCH(batch,
+ 1 << 31 | /* slice concealment */
+ (height_in_mbs - 1) << 16 |
+ (width_in_mbs - 1));
+
+ if (slice_param && slice_param->quantiser_scale_code >= 14)
+ OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ 0xFFF << 16 | /* InterMBMaxSize */
+ 0xFFF << 0 | /* IntraMBMaxSize */
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ unsigned char intra_qm[64] = {
+ 8, 16, 19, 22, 26, 27, 29, 34,
+ 16, 16, 22, 24, 27, 29, 34, 37,
+ 19, 22, 26, 27, 29, 34, 34, 38,
+ 22, 22, 26, 27, 29, 34, 37, 40,
+ 22, 26, 27, 29, 32, 35, 40, 48,
+ 26, 27, 29, 32, 35, 40, 48, 58,
+ 26, 27, 29, 34, 38, 46, 56, 69,
+ 27, 29, 35, 38, 46, 56, 69, 83
+ };
+
+ unsigned char non_intra_qm[64] = {
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16
+ };
+
+ gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
+ gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
+}
+
+static void
+gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ unsigned short intra_fqm[64] = {
+ 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
+ 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
+ 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
+ 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
+ 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
+ 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
+ 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
+ 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
+ };
+
+ unsigned short non_intra_fqm[64] = {
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+ };
+
+ gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
+ gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
+}
+
+static void
+gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ int x, int y,
+ int next_x, int next_y,
+ int is_fisrt_slice_group,
+ int is_last_slice_group,
+ int intra_slice,
+ int qp,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 8);
+
+ OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
+ OUT_BCS_BATCH(batch,
+ 0 << 31 | /* MbRateCtrlFlag */
+ !!is_last_slice_group << 19 | /* IsLastSliceGrp */
+ 1 << 17 | /* Insert Header before the first slice group data */
+ 1 << 16 | /* SliceData PresentFlag: always 1 */
+ 1 << 15 | /* TailPresentFlag: always 1 */
+ 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */
+ !!intra_slice << 13 | /* IntraSlice */
+ !!intra_slice << 12 | /* IntraSliceFlag */
+ 0);
+ OUT_BCS_BATCH(batch,
+ next_y << 24 |
+ next_x << 16 |
+ y << 8 |
+ x << 0 |
+ 0);
+ OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */
+ /* bitstream pointer is only loaded once for the first slice of a frame when
+ * LoadSlicePointerFlag is 0
+ */
+ OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
+ OUT_BCS_BATCH(batch, 0); /* FIXME: */
+ OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */
+ OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static int
+gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ int x, int y,
+ int first_mb_in_slice,
+ int last_mb_in_slice,
+ int first_mb_in_slice_group,
+ int last_mb_in_slice_group,
+ int mb_type,
+ int qp_scale_code,
+ int coded_block_pattern,
+ unsigned char target_size_in_word,
+ unsigned char max_size_in_word,
+ struct intel_batchbuffer *batch)
+{
+ int len_in_dwords = 9;
+
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+ OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
+ OUT_BCS_BATCH(batch,
+ 0 << 24 | /* PackedMvNum */
+ 0 << 20 | /* MvFormat */
+ 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
+ 0 << 15 | /* TransformFlag: frame DCT */
+ 0 << 14 | /* FieldMbFlag */
+ 1 << 13 | /* IntraMbFlag */
+ mb_type << 8 | /* MbType: Intra */
+ 0 << 2 | /* SkipMbFlag */
+ 0 << 0 | /* InterMbMode */
+ 0);
+ OUT_BCS_BATCH(batch, y << 16 | x);
+ OUT_BCS_BATCH(batch,
+ max_size_in_word << 24 |
+ target_size_in_word << 16 |
+ coded_block_pattern << 6 | /* CBP */
+ 0);
+ OUT_BCS_BATCH(batch,
+ last_mb_in_slice << 31 |
+ first_mb_in_slice << 30 |
+ 0 << 27 | /* EnableCoeffClamp */
+ last_mb_in_slice_group << 26 |
+ 0 << 25 | /* MbSkipConvDisable */
+ first_mb_in_slice_group << 24 |
+ 0 << 16 | /* MvFieldSelect */
+ qp_scale_code << 0 |
+ 0);
+ OUT_BCS_BATCH(batch, 0); /* MV[0][0] */
+ OUT_BCS_BATCH(batch, 0); /* MV[1][0] */
+ OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
+ OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
+
+ ADVANCE_BCS_BATCH(batch);
+
+ return len_in_dwords;
+}
+
+/* Byte offset */
+#define MPEG2_INTER_MV_OFFSET 48
+
+static struct _mv_ranges
+{
+ int low; /* in the unit of 1/2 pixel */
+ int high; /* in the unit of 1/2 pixel */
+} mv_ranges[] = {
+ {0, 0},
+ {-16, 15},
+ {-32, 31},
+ {-64, 63},
+ {-128, 127},
+ {-256, 255},
+ {-512, 511},
+ {-1024, 1023},
+ {-2048, 2047},
+ {-4096, 4095}
+};
+
+static int
+mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
+{
+ if (mv + pos * 16 * 2 < 0 ||
+ mv + (pos + 1) * 16 * 2 > display_max * 2)
+ mv = 0;
+
+ if (f_code > 0 && f_code < 10) {
+ if (mv < mv_ranges[f_code].low)
+ mv = mv_ranges[f_code].low;
+
+ if (mv > mv_ranges[f_code].high)
+ mv = mv_ranges[f_code].high;
+ }
+
+ return mv;
+}
+
+static int
+gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ unsigned int *msg,
+ int width_in_mbs, int height_in_mbs,
+ int x, int y,
+ int first_mb_in_slice,
+ int last_mb_in_slice,
+ int first_mb_in_slice_group,
+ int last_mb_in_slice_group,
+ int qp_scale_code,
+ unsigned char target_size_in_word,
+ unsigned char max_size_in_word,
+ struct intel_batchbuffer *batch)
+{
+ VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ int len_in_dwords = 9;
+ short *mvptr, mvx0, mvy0, mvx1, mvy1;
+
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
+ mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
+ mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
+ mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
+ mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
+
+ BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+ OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
+ OUT_BCS_BATCH(batch,
+ 2 << 24 | /* PackedMvNum */
+ 7 << 20 | /* MvFormat */
+ 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
+ 0 << 15 | /* TransformFlag: frame DCT */
+ 0 << 14 | /* FieldMbFlag */
+ 0 << 13 | /* IntraMbFlag */
+ 1 << 8 | /* MbType: Frame-based */
+ 0 << 2 | /* SkipMbFlag */
+ 0 << 0 | /* InterMbMode */
+ 0);
+ OUT_BCS_BATCH(batch, y << 16 | x);
+ OUT_BCS_BATCH(batch,
+ max_size_in_word << 24 |
+ target_size_in_word << 16 |
+ 0x3f << 6 | /* CBP */
+ 0);
+ OUT_BCS_BATCH(batch,
+ last_mb_in_slice << 31 |
+ first_mb_in_slice << 30 |
+ 0 << 27 | /* EnableCoeffClamp */
+ last_mb_in_slice_group << 26 |
+ 0 << 25 | /* MbSkipConvDisable */
+ first_mb_in_slice_group << 24 |
+ 0 << 16 | /* MvFieldSelect */
+ qp_scale_code << 0 |
+ 0);
+
+ OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */
+ OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */
+ OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
+ OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
+
+ ADVANCE_BCS_BATCH(batch);
+
+ return len_in_dwords;
+}
+
+static void
+intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ struct intel_batchbuffer *slice_batch)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
+
+ if (encode_state->packed_header_data[idx]) {
+ VAEncPackedHeaderParameterBuffer *param = NULL;
+ unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
+ unsigned int length_in_bits;
+
+ assert(encode_state->packed_header_param[idx]);
+ param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
+ length_in_bits = param->bit_length;
+
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ header_data,
+ ALIGN(length_in_bits, 32) >> 5,
+ length_in_bits & 0x1f,
+ 5, /* FIXME: check it */
+ 0,
+ 0,
+ 0, /* Needn't insert emulation bytes for MPEG-2 */
+ slice_batch);
+ }
+
+ idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
+
+ if (encode_state->packed_header_data[idx]) {
+ VAEncPackedHeaderParameterBuffer *param = NULL;
+ unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
+ unsigned int length_in_bits;
+
+ assert(encode_state->packed_header_param[idx]);
+ param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
+ length_in_bits = param->bit_length;
+
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ header_data,
+ ALIGN(length_in_bits, 32) >> 5,
+ length_in_bits & 0x1f,
+ 5, /* FIXME: check it */
+ 0,
+ 0,
+ 0, /* Needn't insert emulation bytes for MPEG-2 */
+ slice_batch);
+ }
+}
+
+static void
+gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int slice_index,
+ VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
+ struct intel_batchbuffer *slice_batch)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
+ unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
+ unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
+ int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
+ int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+ int i, j;
+ int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
+ unsigned int *msg = NULL;
+ unsigned char *msg_ptr = NULL;
+
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
+ h_start_pos = slice_param->macroblock_address % width_in_mbs;
+ v_start_pos = slice_param->macroblock_address / width_in_mbs;
+ assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
+
+ dri_bo_map(vme_context->vme_output.bo , 0);
+ msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
+
+ if (next_slice_group_param) {
+ h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
+ v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
+ } else {
+ h_next_start_pos = 0;
+ v_next_start_pos = height_in_mbs;
+ }
+
+ gen8_mfc_mpeg2_slicegroup_state(ctx,
+ encoder_context,
+ h_start_pos,
+ v_start_pos,
+ h_next_start_pos,
+ v_next_start_pos,
+ slice_index == 0,
+ next_slice_group_param == NULL,
+ slice_param->is_intra_slice,
+ slice_param->quantiser_scale_code,
+ slice_batch);
+
+ if (slice_index == 0)
+ intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
+
+ /* Insert '00' to make sure the header is valid */
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ (unsigned int*)section_delimiter,
+ 1,
+ 8, /* 8bits in the last DWORD */
+ 1, /* 1 byte */
+ 1,
+ 0,
+ 0,
+ slice_batch);
+
+ for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
+ /* PAK for each macroblocks */
+ for (j = 0; j < slice_param->num_macroblocks; j++) {
+ int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
+ int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
+ int first_mb_in_slice = (j == 0);
+ int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
+ int first_mb_in_slice_group = (i == 0 && j == 0);
+ int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
+ j == slice_param->num_macroblocks - 1);
+
+ msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
+
+ if (slice_param->is_intra_slice) {
+ gen8_mfc_mpeg2_pak_object_intra(ctx,
+ encoder_context,
+ h_pos, v_pos,
+ first_mb_in_slice,
+ last_mb_in_slice,
+ first_mb_in_slice_group,
+ last_mb_in_slice_group,
+ 0x1a,
+ slice_param->quantiser_scale_code,
+ 0x3f,
+ 0,
+ 0xff,
+ slice_batch);
+ } else {
+ int inter_rdo, intra_rdo;
+ inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
+ intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
+
+ if (intra_rdo < inter_rdo)
+ gen8_mfc_mpeg2_pak_object_intra(ctx,
+ encoder_context,
+ h_pos, v_pos,
+ first_mb_in_slice,
+ last_mb_in_slice,
+ first_mb_in_slice_group,
+ last_mb_in_slice_group,
+ 0x1a,
+ slice_param->quantiser_scale_code,
+ 0x3f,
+ 0,
+ 0xff,
+ slice_batch);
+ else
+ gen8_mfc_mpeg2_pak_object_inter(ctx,
+ encode_state,
+ encoder_context,
+ msg,
+ width_in_mbs, height_in_mbs,
+ h_pos, v_pos,
+ first_mb_in_slice,
+ last_mb_in_slice,
+ first_mb_in_slice_group,
+ last_mb_in_slice_group,
+ slice_param->quantiser_scale_code,
+ 0,
+ 0xff,
+ slice_batch);
+ }
+ }
+
+ slice_param++;
+ }
+
+ dri_bo_unmap(vme_context->vme_output.bo);
+
+ /* tail data */
+ if (next_slice_group_param == NULL) { /* end of a picture */
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ (unsigned int *)tail_delimiter,
+ 2,
+ 8, /* 8bits in the last DWORD */
+ 5, /* 5 bytes */
+ 1,
+ 1,
+ 0,
+ slice_batch);
+ } else { /* end of a lsice group */
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ (unsigned int *)section_delimiter,
+ 1,
+ 8, /* 8bits in the last DWORD */
+ 1, /* 1 byte */
+ 1,
+ 1,
+ 0,
+ slice_batch);
+ }
+}
+
+/*
+ * A batch buffer for all slices, including slice state,
+ * slice insert object and slice pak object commands
+ *
+ */
+static dri_bo *
+gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch;
+ VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
+ dri_bo *batch_bo;
+ int i;
+
+ batch = mfc_context->aux_batchbuffer;
+ batch_bo = batch->buffer;
+
+ for (i = 0; i < encode_state->num_slice_params_ext; i++) {
+ if (i == encode_state->num_slice_params_ext - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
+
+ gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
+ }
+
+ intel_batchbuffer_align(batch, 8);
+
+ BEGIN_BCS_BATCH(batch, 2);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_reference(batch_bo);
+ intel_batchbuffer_free(batch);
+ mfc_context->aux_batchbuffer = NULL;
+
+ return batch_bo;
+}
+
+static void
+gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
+ mfc_context->set_surface_state(ctx, encoder_context);
+ mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
+ gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
+ gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
+ gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
+ gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
+ gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
+}
+
+static void
+gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ dri_bo *slice_batch_bo;
+
+ slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
+
+ // begin programing
+ intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
+ intel_batchbuffer_emit_mi_flush(batch);
+
+ // picture level programing
+ gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
+
+ BEGIN_BCS_BATCH(batch, 4);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+ OUT_BCS_RELOC(batch,
+ slice_batch_bo,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+
+ // end programing
+ intel_batchbuffer_end_atomic(batch);
+
+ dri_bo_unreference(slice_batch_bo);
+}
+
+static VAStatus
+intel_mfc_mpeg2_prepare(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct object_surface *obj_surface;
+ struct object_buffer *obj_buffer;
+ struct i965_coded_buffer_segment *coded_buffer_segment;
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ dri_bo *bo;
+ int i;
+
+ /* reconstructed surface */
+ obj_surface = encode_state->reconstructed_object;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+ mfc_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->pre_deblocking_output.bo);
+ mfc_context->surface_state.width = obj_surface->orig_width;
+ mfc_context->surface_state.height = obj_surface->orig_height;
+ mfc_context->surface_state.w_pitch = obj_surface->width;
+ mfc_context->surface_state.h_pitch = obj_surface->height;
+
+ /* forward reference */
+ obj_surface = encode_state->reference_objects[0];
+
+ if (obj_surface && obj_surface->bo) {
+ mfc_context->reference_surfaces[0].bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->reference_surfaces[0].bo);
+ } else
+ mfc_context->reference_surfaces[0].bo = NULL;
+
+ /* backward reference */
+ obj_surface = encode_state->reference_objects[1];
+
+ if (obj_surface && obj_surface->bo) {
+ mfc_context->reference_surfaces[1].bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->reference_surfaces[1].bo);
+ } else {
+ mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
+
+ if (mfc_context->reference_surfaces[1].bo)
+ dri_bo_reference(mfc_context->reference_surfaces[1].bo);
+ }
+
+ for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
+ mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
+
+ if (mfc_context->reference_surfaces[i].bo)
+ dri_bo_reference(mfc_context->reference_surfaces[i].bo);
+ }
+
+ /* input YUV surface */
+ obj_surface = encode_state->input_yuv_object;
+ mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
+
+ /* coded buffer */
+ obj_buffer = encode_state->coded_buf_object;
+ bo = obj_buffer->buffer_store->bo;
+ mfc_context->mfc_indirect_pak_bse_object.bo = bo;
+ mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
+ mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
+ dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
+
+ /* set the internal flag to 0 to indicate the coded size is unknown */
+ dri_bo_map(bo, 1);
+ coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
+ coded_buffer_segment->mapped = 0;
+ coded_buffer_segment->codec = encoder_context->codec;
+ dri_bo_unmap(bo);
+
+ return vaStatus;
+}
+
+static VAStatus
+gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ gen8_mfc_init(ctx, encode_state, encoder_context);
+ intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
+ /*Programing bcs pipeline*/
+ gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
+ gen8_mfc_run(ctx, encode_state, encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static void
+gen8_mfc_context_destroy(void *context)
+{
+ struct gen6_mfc_context *mfc_context = context;
+ int i;
+
+ dri_bo_unreference(mfc_context->post_deblocking_output.bo);
+ mfc_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
+ mfc_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
+ mfc_context->uncompressed_picture_source.bo = NULL;
+
+ dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
+ mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
+
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+ dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
+ mfc_context->direct_mv_buffers[i].bo = NULL;
+ }
+
+ dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
+ mfc_context->intra_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
+ mfc_context->macroblock_status_buffer.bo = NULL;
+
+ dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
+ mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
+ mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+
+ for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
+ dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
+ mfc_context->reference_surfaces[i].bo = NULL;
+ }
+
+ i965_gpe_context_destroy(&mfc_context->gpe_context);
+
+ dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
+ mfc_context->mfc_batchbuffer_surface.bo = NULL;
+
+ dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
+ mfc_context->aux_batchbuffer_surface.bo = NULL;
+
+ if (mfc_context->aux_batchbuffer)
+ intel_batchbuffer_free(mfc_context->aux_batchbuffer);
+
+ mfc_context->aux_batchbuffer = NULL;
+
+ free(mfc_context);
+}
+
+static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ VAStatus vaStatus;
+
+ switch (profile) {
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
+ vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
+ break;
+
+ /* FIXME: add for other profile */
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
+ break;
+
+ default:
+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+ break;
+ }
+
+ return vaStatus;
+}
+
+Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
+
+ mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
+
+ mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
+ mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
+
+ mfc_context->gpe_context.curbe.length = 32 * 4;
+
+ mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+ mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
+ mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
+ mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
+ mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
+
+ i965_gpe_load_kernels(ctx,
+ &mfc_context->gpe_context,
+ gen8_mfc_kernels,
+ NUM_MFC_KERNEL);
+
+ mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
+ mfc_context->set_surface_state = gen8_mfc_surface_state;
+ mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
+ mfc_context->avc_img_state = gen8_mfc_avc_img_state;
+ mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
+ mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
+ mfc_context->insert_object = gen8_mfc_avc_insert_object;
+ mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
+
+ encoder_context->mfc_context = mfc_context;
+ encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
+ encoder_context->mfc_pipeline = gen8_mfc_pipeline;
+ encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
+
+ return True;
+}
diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
new file mode 100644
index 0000000..b482846
--- /dev/null
+++ b/src/gen8_mfd.c
@@ -0,0 +1,3190 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang@intel.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+#include <va/va_dec_jpeg.h>
+#include <va/va_dec_vp8.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_decoder_utils.h"
+
+#include "gen7_mfd.h"
+#include "intel_media.h"
+
+#define B0_STEP_REV 2
+#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
+
+static const uint32_t zigzag_direct[64] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static void
+gen8_mfd_init_avc_surface(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
+ int width_in_mbs, height_in_mbs;
+
+ obj_surface->free_private_data = gen_free_avc_surface;
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+ if (!gen7_avc_surface) {
+ gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+ gen7_avc_surface->frame_store_id = -1;
+ assert((obj_surface->size & 0x3f) == 0);
+ obj_surface->private_data = gen7_avc_surface;
+ }
+
+ /* DMV buffers now relate to the whole frame, irrespective of
+ field coding modes */
+ if (gen7_avc_surface->dmv_top == NULL) {
+ gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
+ "direct mv w/r buffer",
+ width_in_mbs * height_in_mbs * 128,
+ 0x1000);
+ assert(gen7_avc_surface->dmv_top);
+ }
+}
+
+static void
+gen8_mfd_pipe_mode_select(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ assert(standard_select == MFX_FORMAT_MPEG2 ||
+ standard_select == MFX_FORMAT_AVC ||
+ standard_select == MFX_FORMAT_VC1 ||
+ standard_select == MFX_FORMAT_JPEG ||
+ standard_select == MFX_FORMAT_VP8);
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ (MFX_LONG_MODE << 17) | /* Currently only support long format */
+ (MFD_MODE_VLD << 15) | /* VLD mode */
+ (0 << 10) | /* disable Stream-Out */
+ (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
+ (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
+ (0 << 5) | /* not in stitch mode */
+ (MFX_CODEC_DECODE << 4) | /* decoding mode */
+ (standard_select << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
+ OUT_BCS_BATCH(batch, 0); /* reserved */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_surface_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct object_surface *obj_surface = decode_state->render_object;
+ unsigned int y_cb_offset;
+ unsigned int y_cr_offset;
+ unsigned int surface_format;
+
+ assert(obj_surface);
+
+ y_cb_offset = obj_surface->y_cb_offset;
+ y_cr_offset = obj_surface->y_cr_offset;
+
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ ((obj_surface->orig_height - 1) << 18) |
+ ((obj_surface->orig_width - 1) << 4));
+ OUT_BCS_BATCH(batch,
+ (surface_format << 28) | /* 420 planar YUV surface */
+ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
+ (0 << 22) | /* surface object control state, ignored */
+ ((obj_surface->width - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 */
+ (1 << 1) | /* must be tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for U(Cb), must be 0 */
+ (y_cb_offset << 0)); /* Y offset for U(Cb) */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for V(Cr), must be 0 */
+ (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 61);
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+ /* Pre-deblock 1-3 */
+ if (gen7_mfd_context->pre_deblocking_output.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* Post-debloing 4-6 */
+ if (gen7_mfd_context->post_deblocking_output.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* uncompressed-video & stream out 7-12 */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* intra row-store scratch 13-15 */
+ if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* deblocking-filter-row-store 16-18 */
+ if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* DW 19..50 */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ struct object_surface *obj_surface;
+
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
+ gen7_mfd_context->reference_surface[i].obj_surface &&
+ gen7_mfd_context->reference_surface[i].obj_surface->bo) {
+ obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
+
+ OUT_BCS_RELOC(batch, obj_surface->bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ /* reference property 51 */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* Macroblock status & ILDB 52-57 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the second Macroblock status 58-60 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
+ dri_bo *slice_data_bo,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 26);
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+ /* MFX In BS 1-5 */
+ OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* Upper bound 4-5 */
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX indirect MV 6-10 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX IT_COFF 11-15 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX IT_DBLK 16-20 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX PAK_BSE object for encoder 21-25 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+
+ if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* MPR Row Store Scratch buffer 4-6 */
+ if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* Bitplane 7-9 */
+ if (gen7_mfd_context->bitplane_read_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_qm_state(VADriverContextP ctx,
+ int qm_type,
+ unsigned char *qm,
+ int qm_length,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ unsigned int qm_buffer[16];
+
+ assert(qm_length <= 16 * 4);
+ memcpy(qm_buffer, qm, qm_length);
+
+ BEGIN_BCS_BATCH(batch, 18);
+ OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+ OUT_BCS_BATCH(batch, qm_type << 0);
+ intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_avc_img_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int img_struct;
+ int mbaff_frame_flag;
+ unsigned int width_in_mbs, height_in_mbs;
+ VAPictureParameterBufferH264 *pic_param;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
+
+ if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
+ img_struct = 1;
+ else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
+ img_struct = 3;
+ else
+ img_struct = 0;
+
+ if ((img_struct & 0x1) == 0x1) {
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
+ } else {
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
+ }
+
+ if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
+ assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0);
+ } else {
+ assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
+ }
+
+ mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+ !pic_param->pic_fields.bits.field_pic_flag);
+
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+ /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
+ assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
+ pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
+ assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
+
+ BEGIN_BCS_BATCH(batch, 17);
+ OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
+ OUT_BCS_BATCH(batch,
+ (width_in_mbs * height_in_mbs - 1));
+ OUT_BCS_BATCH(batch,
+ ((height_in_mbs - 1) << 16) |
+ ((width_in_mbs - 1) << 0));
+ OUT_BCS_BATCH(batch,
+ ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
+ ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
+ (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
+ (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
+ (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
+ (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
+ (img_struct << 8));
+ OUT_BCS_BATCH(batch,
+ (pic_param->seq_fields.bits.chroma_format_idc << 10) |
+ (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
+ ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
+ (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
+ (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
+ (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
+ (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
+ (mbaff_frame_flag << 1) |
+ (pic_param->pic_fields.bits.field_pic_flag << 0));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_avc_qm_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAIQMatrixBufferH264 *iq_matrix;
+ VAPictureParameterBufferH264 *pic_param;
+
+ if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
+ iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
+ else
+ iq_matrix = &gen7_mfd_context->iq_matrix.h264;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+ gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
+ gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
+
+ if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
+ gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
+ gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
+ }
+}
+
+static inline void
+gen8_mfd_avc_picid_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
+ gen7_mfd_context->reference_surface);
+}
+
+static void
+gen8_mfd_avc_directmode_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct object_surface *obj_surface;
+ GenAvcSurface *gen7_avc_surface;
+ VAPictureH264 *va_pic;
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 71);
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+ /* reference surfaces 0..15 */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
+ gen7_mfd_context->reference_surface[i].obj_surface &&
+ gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
+
+ obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
+ gen7_avc_surface = obj_surface->private_data;
+
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the current decoding frame/field */
+ va_pic = &pic_param->CurrPic;
+ obj_surface = decode_state->render_object;
+ assert(obj_surface->bo && obj_surface->private_data);
+ gen7_avc_surface = obj_surface->private_data;
+
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* POC List */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
+
+ if (obj_surface) {
+ const VAPictureH264 * const va_pic = avc_find_picture(
+ obj_surface->base.id, pic_param->ReferenceFrames,
+ ARRAY_ELEMS(pic_param->ReferenceFrames));
+
+ assert(va_pic != NULL);
+ OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ va_pic = &pic_param->CurrPic;
+ OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
+}
+
+static void
+gen8_mfd_avc_slice_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+ int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+ int num_ref_idx_l0, num_ref_idx_l1;
+ int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
+ pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+ int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
+ int slice_type;
+
+ if (slice_param->slice_type == SLICE_TYPE_I ||
+ slice_param->slice_type == SLICE_TYPE_SI) {
+ slice_type = SLICE_TYPE_I;
+ } else if (slice_param->slice_type == SLICE_TYPE_P ||
+ slice_param->slice_type == SLICE_TYPE_SP) {
+ slice_type = SLICE_TYPE_P;
+ } else {
+ assert(slice_param->slice_type == SLICE_TYPE_B);
+ slice_type = SLICE_TYPE_B;
+ }
+
+ if (slice_type == SLICE_TYPE_I) {
+ assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
+ assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+ num_ref_idx_l0 = 0;
+ num_ref_idx_l1 = 0;
+ } else if (slice_type == SLICE_TYPE_P) {
+ assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+ num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_idx_l1 = 0;
+ } else {
+ num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+ }
+
+ first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
+ slice_hor_pos = first_mb_in_slice % width_in_mbs;
+ slice_ver_pos = first_mb_in_slice / width_in_mbs;
+
+ if (next_slice_param) {
+ first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
+ next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
+ next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+ } else {
+ next_slice_hor_pos = 0;
+ next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
+ }
+
+ BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
+ OUT_BCS_BATCH(batch, slice_type);
+ OUT_BCS_BATCH(batch,
+ (num_ref_idx_l1 << 24) |
+ (num_ref_idx_l0 << 16) |
+ (slice_param->chroma_log2_weight_denom << 8) |
+ (slice_param->luma_log2_weight_denom << 0));
+ OUT_BCS_BATCH(batch,
+ (slice_param->direct_spatial_mv_pred_flag << 29) |
+ (slice_param->disable_deblocking_filter_idc << 27) |
+ (slice_param->cabac_init_idc << 24) |
+ ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
+ ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+ ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+ OUT_BCS_BATCH(batch,
+ (slice_ver_pos << 24) |
+ (slice_hor_pos << 16) |
+ (first_mb_in_slice << 0));
+ OUT_BCS_BATCH(batch,
+ (next_slice_ver_pos << 16) |
+ (next_slice_hor_pos << 0));
+ OUT_BCS_BATCH(batch,
+ (next_slice_param == NULL) << 19); /* last slice flag */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static inline void
+gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen6_send_avc_ref_idx_state(
+ gen7_mfd_context->base.batch,
+ slice_param,
+ gen7_mfd_context->reference_surface
+ );
+}
+
+static void
+gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int i, j, num_weight_offset_table = 0;
+ short weightoffsets[32 * 6];
+
+ if ((slice_param->slice_type == SLICE_TYPE_P ||
+ slice_param->slice_type == SLICE_TYPE_SP) &&
+ (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
+ num_weight_offset_table = 1;
+ }
+
+ if ((slice_param->slice_type == SLICE_TYPE_B) &&
+ (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
+ num_weight_offset_table = 2;
+ }
+
+ for (i = 0; i < num_weight_offset_table; i++) {
+ BEGIN_BCS_BATCH(batch, 98);
+ OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
+ OUT_BCS_BATCH(batch, i);
+
+ if (i == 0) {
+ for (j = 0; j < 32; j++) {
+ weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
+ weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
+ weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
+ weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
+ weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
+ weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
+ }
+ } else {
+ for (j = 0; j < 32; j++) {
+ weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
+ weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
+ weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
+ weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
+ weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
+ weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
+ }
+ }
+
+ intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
+ ADVANCE_BCS_BATCH(batch);
+ }
+}
+
+static void
+gen8_mfd_avc_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ dri_bo *slice_data_bo,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
+ slice_param,
+ pic_param->pic_fields.bits.entropy_coding_mode_flag);
+
+ /* the input bitsteam format on GEN7 differs from GEN6 */
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ (slice_param->slice_data_size));
+ OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
+ OUT_BCS_BATCH(batch,
+ (0 << 31) |
+ (0 << 14) |
+ (0 << 12) |
+ (0 << 10) |
+ (0 << 8));
+ OUT_BCS_BATCH(batch,
+ ((slice_data_bit_offset >> 3) << 16) |
+ (1 << 7) |
+ (0 << 5) |
+ (0 << 4) |
+ ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
+ (slice_data_bit_offset & 0x7));
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static inline void
+gen8_mfd_avc_context_init(
+ VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context
+)
+{
+ /* Initialize flat scaling lists */
+ avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
+}
+
+static void
+gen8_mfd_avc_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferH264 *pic_param;
+ VASliceParameterBufferH264 *slice_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ dri_bo *bo;
+ int i, j, enable_avc_ildb = 0;
+ unsigned int width_in_mbs, height_in_mbs;
+
+ for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
+ (slice_param->slice_type == SLICE_TYPE_SI) ||
+ (slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) ||
+ (slice_param->slice_type == SLICE_TYPE_B));
+
+ if (slice_param->disable_deblocking_filter_idc != 1) {
+ enable_avc_ildb = 1;
+ break;
+ }
+
+ slice_param++;
+ }
+ }
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
+ gen7_mfd_context->reference_surface);
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+ assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
+ assert(height_in_mbs > 0 && height_in_mbs <= 256);
+
+ /* Current decoded picture */
+ obj_surface = decode_state->render_object;
+ if (pic_param->pic_fields.bits.reference_pic_flag)
+ obj_surface->flags |= SURFACE_REFERENCED;
+ else
+ obj_surface->flags &= ~SURFACE_REFERENCED;
+
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
+ gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
+
+ dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
+
+ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ width_in_mbs * 64,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "deblocking filter row store",
+ width_in_mbs * 64 * 4,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ width_in_mbs * 64 * 2,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "mpr row store",
+ width_in_mbs * 64 * 2,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
+
+ gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen8_mfd_avc_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferH264 *pic_param;
+ VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
+
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
+ gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
+ gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+
+ if (j == 0 && slice_param->first_mb_in_slice)
+ gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
+ (slice_param->slice_type == SLICE_TYPE_SI) ||
+ (slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) ||
+ (slice_param->slice_type == SLICE_TYPE_B));
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
+ gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
+ gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
+ gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+ gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferMPEG2 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ dri_bo *bo;
+ unsigned int width_in_mbs;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+ width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+
+ mpeg2_set_reference_surfaces(
+ ctx,
+ gen7_mfd_context->reference_surface,
+ decode_state,
+ pic_param
+ );
+
+ /* Current decoded picture */
+ obj_surface = decode_state->render_object;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ width_in_mbs * 96,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ gen7_mfd_context->post_deblocking_output.valid = 0;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+ gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferMPEG2 *pic_param;
+ unsigned int slice_concealment_disable_bit = 0;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+ slice_concealment_disable_bit = 1;
+
+ BEGIN_BCS_BATCH(batch, 13);
+ OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
+ OUT_BCS_BATCH(batch,
+ (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
+ ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
+ ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
+ ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
+ pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
+ pic_param->picture_coding_extension.bits.picture_structure << 12 |
+ pic_param->picture_coding_extension.bits.top_field_first << 11 |
+ pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
+ pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
+ pic_param->picture_coding_extension.bits.q_scale_type << 8 |
+ pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
+ pic_param->picture_coding_extension.bits.alternate_scan << 6);
+ OUT_BCS_BATCH(batch,
+ pic_param->picture_coding_type << 9);
+ OUT_BCS_BATCH(batch,
+ (slice_concealment_disable_bit << 31) |
+ ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
+ ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
+ int i, j;
+
+ /* Update internal QM state */
+ if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
+ VAIQMatrixBufferMPEG2 * const iq_matrix =
+ (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
+
+ if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
+ iq_matrix->load_intra_quantiser_matrix) {
+ gen_iq_matrix->load_intra_quantiser_matrix =
+ iq_matrix->load_intra_quantiser_matrix;
+ if (iq_matrix->load_intra_quantiser_matrix) {
+ for (j = 0; j < 64; j++)
+ gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
+ iq_matrix->intra_quantiser_matrix[j];
+ }
+ }
+
+ if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
+ iq_matrix->load_non_intra_quantiser_matrix) {
+ gen_iq_matrix->load_non_intra_quantiser_matrix =
+ iq_matrix->load_non_intra_quantiser_matrix;
+ if (iq_matrix->load_non_intra_quantiser_matrix) {
+ for (j = 0; j < 64; j++)
+ gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
+ iq_matrix->non_intra_quantiser_matrix[j];
+ }
+ }
+ }
+
+ /* Commit QM state to HW */
+ for (i = 0; i < 2; i++) {
+ unsigned char *qm = NULL;
+ int qm_type;
+
+ if (i == 0) {
+ if (gen_iq_matrix->load_intra_quantiser_matrix) {
+ qm = gen_iq_matrix->intra_quantiser_matrix;
+ qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
+ }
+ } else {
+ if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
+ qm = gen_iq_matrix->non_intra_quantiser_matrix;
+ qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
+ }
+ }
+
+ if (!qm)
+ continue;
+
+ gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
+ }
+}
+
+static void
+gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferMPEG2 *pic_param,
+ VASliceParameterBufferMPEG2 *slice_param,
+ VASliceParameterBufferMPEG2 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+ int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
+
+ if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
+ pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
+ is_field_pic = 1;
+ is_field_pic_wa = is_field_pic &&
+ gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
+
+ vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
+ hpos0 = slice_param->slice_horizontal_position;
+
+ if (next_slice_param == NULL) {
+ vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
+ hpos1 = 0;
+ } else {
+ vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
+ hpos1 = next_slice_param->slice_horizontal_position;
+ }
+
+ mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ hpos0 << 24 |
+ vpos0 << 16 |
+ mb_count << 8 |
+ (next_slice_param == NULL) << 5 |
+ (next_slice_param == NULL) << 3 |
+ (slice_param->macroblock_offset & 0x7));
+ OUT_BCS_BATCH(batch,
+ (slice_param->quantiser_scale_code << 24) |
+ (vpos1 << 8 | hpos1));
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferMPEG2 *pic_param;
+ VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+ gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
+ gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
+
+ if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
+ gen7_mfd_context->wa_mpeg2_slice_vertical_position =
+ mpeg2_wa_slice_vertical_position(decode_state, pic_param);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static const int va_to_gen7_vc1_pic_type[5] = {
+ GEN7_VC1_I_PICTURE,
+ GEN7_VC1_P_PICTURE,
+ GEN7_VC1_B_PICTURE,
+ GEN7_VC1_BI_PICTURE,
+ GEN7_VC1_P_PICTURE,
+};
+
+static const int va_to_gen7_vc1_mv[4] = {
+ 1, /* 1-MV */
+ 2, /* 1-MV half-pel */
+ 3, /* 1-MV half-pef bilinear */
+ 0, /* Mixed MV */
+};
+
+static const int b_picture_scale_factor[21] = {
+ 128, 85, 170, 64, 192,
+ 51, 102, 153, 204, 43,
+ 215, 37, 74, 111, 148,
+ 185, 222, 32, 96, 160,
+ 224,
+};
+
+static const int va_to_gen7_vc1_condover[3] = {
+ 0,
+ 2,
+ 3
+};
+
+static const int va_to_gen7_vc1_profile[4] = {
+ GEN7_VC1_SIMPLE_PROFILE,
+ GEN7_VC1_MAIN_PROFILE,
+ GEN7_VC1_RESERVED_PROFILE,
+ GEN7_VC1_ADVANCED_PROFILE
+};
+
+static void
+gen8_mfd_free_vc1_surface(void **data)
+{
+ struct gen7_vc1_surface *gen7_vc1_surface = *data;
+
+ if (!gen7_vc1_surface)
+ return;
+
+ dri_bo_unreference(gen7_vc1_surface->dmv);
+ free(gen7_vc1_surface);
+ *data = NULL;
+}
+
+static void
+gen8_mfd_init_vc1_surface(VADriverContextP ctx,
+ VAPictureParameterBufferVC1 *pic_param,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
+ int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+ int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+
+ obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
+
+ if (!gen7_vc1_surface) {
+ gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
+ assert((obj_surface->size & 0x3f) == 0);
+ obj_surface->private_data = gen7_vc1_surface;
+ }
+
+ gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
+
+ if (gen7_vc1_surface->dmv == NULL) {
+ gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
+ "direct mv w/r buffer",
+ width_in_mbs * height_in_mbs * 64,
+ 0x1000);
+ }
+}
+
+static void
+gen8_mfd_vc1_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferVC1 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ dri_bo *bo;
+ int width_in_mbs;
+ int picture_type;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+ width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+ picture_type = pic_param->picture_fields.bits.picture_type;
+
+ intel_update_vc1_frame_store_index(ctx,
+ decode_state,
+ pic_param,
+ gen7_mfd_context->reference_surface);
+
+ /* Current decoded picture */
+ obj_surface = decode_state->render_object;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+ gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
+
+ dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
+
+ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ width_in_mbs * 64,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "deblocking filter row store",
+ width_in_mbs * 7 * 64,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ width_in_mbs * 96,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
+ dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
+
+ if (gen7_mfd_context->bitplane_read_buffer.valid) {
+ int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+ int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+ int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
+ int src_w, src_h;
+ uint8_t *src = NULL, *dst = NULL;
+
+ assert(decode_state->bit_plane->buffer);
+ src = decode_state->bit_plane->buffer;
+
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VC-1 Bitplane",
+ bitplane_width * height_in_mbs,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bitplane_read_buffer.bo = bo;
+
+ dri_bo_map(bo, True);
+ assert(bo->virtual);
+ dst = bo->virtual;
+
+ for (src_h = 0; src_h < height_in_mbs; src_h++) {
+ for(src_w = 0; src_w < width_in_mbs; src_w++) {
+ int src_index, dst_index;
+ int src_shift;
+ uint8_t src_value;
+
+ src_index = (src_h * width_in_mbs + src_w) / 2;
+ src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
+ src_value = ((src[src_index] >> src_shift) & 0xf);
+
+ if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
+ src_value |= 0x2;
+ }
+
+ dst_index = src_w / 2;
+ dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
+ }
+
+ if (src_w & 1)
+ dst[src_w / 2] >>= 4;
+
+ dst += bitplane_width;
+ }
+
+ dri_bo_unmap(bo);
+ } else
+ gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+}
+
+static void
+gen8_mfd_vc1_pic_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ struct object_surface *obj_surface;
+ int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
+ int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
+ int unified_mv_mode;
+ int ref_field_pic_polarity = 0;
+ int scale_factor = 0;
+ int trans_ac_y = 0;
+ int dmv_surface_valid = 0;
+ int brfd = 0;
+ int fcm = 0;
+ int picture_type;
+ int profile;
+ int overlap;
+ int interpolation_mode = 0;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
+ dquant = pic_param->pic_quantizer_fields.bits.dquant;
+ dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
+ dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
+ dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
+ dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
+ dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
+ alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
+
+ if (dquant == 0) {
+ alt_pquant_config = 0;
+ alt_pquant_edge_mask = 0;
+ } else if (dquant == 2) {
+ alt_pquant_config = 1;
+ alt_pquant_edge_mask = 0xf;
+ } else {
+ assert(dquant == 1);
+ if (dquantfrm == 0) {
+ alt_pquant_config = 0;
+ alt_pquant_edge_mask = 0;
+ alt_pq = 0;
+ } else {
+ assert(dquantfrm == 1);
+ alt_pquant_config = 1;
+
+ switch (dqprofile) {
+ case 3:
+ if (dqbilevel == 0) {
+ alt_pquant_config = 2;
+ alt_pquant_edge_mask = 0;
+ } else {
+ assert(dqbilevel == 1);
+ alt_pquant_config = 3;
+ alt_pquant_edge_mask = 0;
+ }
+ break;
+
+ case 0:
+ alt_pquant_edge_mask = 0xf;
+ break;
+
+ case 1:
+ if (dqdbedge == 3)
+ alt_pquant_edge_mask = 0x9;
+ else
+ alt_pquant_edge_mask = (0x3 << dqdbedge);
+
+ break;
+
+ case 2:
+ alt_pquant_edge_mask = (0x1 << dqsbedge);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+ }
+
+ if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
+ assert(pic_param->mv_fields.bits.mv_mode2 < 4);
+ unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
+ } else {
+ assert(pic_param->mv_fields.bits.mv_mode < 4);
+ unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
+ }
+
+ if (pic_param->sequence_fields.bits.interlace == 1 &&
+ pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
+ /* FIXME: calculate reference field picture polarity */
+ assert(0);
+ ref_field_pic_polarity = 0;
+ }
+
+ if (pic_param->b_picture_fraction < 21)
+ scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
+
+ picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
+
+ if (profile == GEN7_VC1_ADVANCED_PROFILE &&
+ picture_type == GEN7_VC1_I_PICTURE)
+ picture_type = GEN7_VC1_BI_PICTURE;
+
+ if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
+ trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
+ else {
+ trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
+
+ /*
+ * 8.3.6.2.1 Transform Type Selection
+ * If variable-sized transform coding is not enabled,
+ * then the 8x8 transform shall be used for all blocks.
+ * it is also MFX_VC1_PIC_STATE requirement.
+ */
+ if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
+ pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
+ pic_param->transform_fields.bits.frame_level_transform_type = 0;
+ }
+ }
+
+ if (picture_type == GEN7_VC1_B_PICTURE) {
+ struct gen7_vc1_surface *gen7_vc1_surface = NULL;
+
+ obj_surface = decode_state->reference_objects[1];
+
+ if (obj_surface)
+ gen7_vc1_surface = obj_surface->private_data;
+
+ if (!gen7_vc1_surface ||
+ (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
+ va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
+ dmv_surface_valid = 0;
+ else
+ dmv_surface_valid = 1;
+ }
+
+ assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
+
+ if (pic_param->picture_fields.bits.frame_coding_mode < 2)
+ fcm = pic_param->picture_fields.bits.frame_coding_mode;
+ else {
+ if (pic_param->picture_fields.bits.top_field_first)
+ fcm = 2;
+ else
+ fcm = 3;
+ }
+
+ if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
+ brfd = pic_param->reference_fields.bits.reference_distance;
+ brfd = (scale_factor * brfd) >> 8;
+ brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
+
+ if (brfd < 0)
+ brfd = 0;
+ }
+
+ overlap = 0;
+ if (profile != GEN7_VC1_ADVANCED_PROFILE){
+ if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
+ pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
+ overlap = 1;
+ }
+ }else {
+ if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
+ pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
+ overlap = 1;
+ }
+ if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
+ pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
+ if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
+ overlap = 1;
+ } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
+ va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
+ overlap = 1;
+ }
+ }
+ }
+
+ assert(pic_param->conditional_overlap_flag < 3);
+ assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
+
+ if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
+ (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
+ pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
+ interpolation_mode = 9; /* Half-pel bilinear */
+ else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
+ (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
+ pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
+ interpolation_mode = 1; /* Half-pel bicubic */
+ else
+ interpolation_mode = 0; /* Quarter-pel bicubic */
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
+ ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
+ OUT_BCS_BATCH(batch,
+ ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
+ dmv_surface_valid << 15 |
+ (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
+ pic_param->rounding_control << 13 |
+ pic_param->sequence_fields.bits.syncmarker << 12 |
+ interpolation_mode << 8 |
+ 0 << 7 | /* FIXME: scale up or down ??? */
+ pic_param->range_reduction_frame << 6 |
+ pic_param->entrypoint_fields.bits.loopfilter << 5 |
+ overlap << 4 |
+ !pic_param->picture_fields.bits.is_first_field << 3 |
+ (pic_param->sequence_fields.bits.profile == 3) << 0);
+ OUT_BCS_BATCH(batch,
+ va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
+ picture_type << 26 |
+ fcm << 24 |
+ alt_pq << 16 |
+ pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
+ scale_factor << 0);
+ OUT_BCS_BATCH(batch,
+ unified_mv_mode << 28 |
+ pic_param->mv_fields.bits.four_mv_switch << 27 |
+ pic_param->fast_uvmc_flag << 26 |
+ ref_field_pic_polarity << 25 |
+ pic_param->reference_fields.bits.num_reference_pictures << 24 |
+ pic_param->reference_fields.bits.reference_distance << 20 |
+ pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
+ pic_param->mv_fields.bits.extended_dmv_range << 10 |
+ pic_param->mv_fields.bits.extended_mv_range << 8 |
+ alt_pquant_edge_mask << 4 |
+ alt_pquant_config << 2 |
+ pic_param->pic_quantizer_fields.bits.half_qp << 1 |
+ pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
+ OUT_BCS_BATCH(batch,
+ !!pic_param->bitplane_present.value << 31 |
+ !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
+ !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
+ !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
+ !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
+ !pic_param->bitplane_present.flags.bp_overflags << 26 |
+ !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
+ !pic_param->bitplane_present.flags.bp_field_tx << 24 |
+ pic_param->mv_fields.bits.mv_table << 20 |
+ pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
+ pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
+ pic_param->transform_fields.bits.frame_level_transform_type << 12 |
+ pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
+ pic_param->mb_mode_table << 8 |
+ trans_ac_y << 6 |
+ pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
+ pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
+ pic_param->cbp_table << 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ int intensitycomp_single;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+ intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ 0 << 14 | /* FIXME: double ??? */
+ 0 << 12 |
+ intensitycomp_single << 10 |
+ intensitycomp_single << 8 |
+ 0 << 4 | /* FIXME: interlace mode */
+ 0);
+ OUT_BCS_BATCH(batch,
+ pic_param->luma_shift << 16 |
+ pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct object_surface *obj_surface;
+ dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
+
+ obj_surface = decode_state->render_object;
+
+ if (obj_surface && obj_surface->private_data) {
+ dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+ }
+
+ obj_surface = decode_state->reference_objects[1];
+
+ if (obj_surface && obj_surface->private_data) {
+ dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+ }
+
+ BEGIN_BCS_BATCH(batch, 7);
+ OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
+
+ if (dmv_write_buffer)
+ OUT_BCS_RELOC(batch, dmv_write_buffer,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ if (dmv_read_buffer)
+ OUT_BCS_RELOC(batch, dmv_read_buffer,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static int
+gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
+{
+ int out_slice_data_bit_offset;
+ int slice_header_size = in_slice_data_bit_offset / 8;
+ int i, j;
+
+ if (profile != 3)
+ out_slice_data_bit_offset = in_slice_data_bit_offset;
+ else {
+ for (i = 0, j = 0; i < slice_header_size; i++, j++) {
+ if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
+ i++, j += 2;
+ }
+ }
+
+ out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
+ }
+
+ return out_slice_data_bit_offset;
+}
+
+static void
+gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferVC1 *pic_param,
+ VASliceParameterBufferVC1 *slice_param,
+ VASliceParameterBufferVC1 *next_slice_param,
+ dri_bo *slice_data_bo,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int next_slice_start_vert_pos;
+ int macroblock_offset;
+ uint8_t *slice_data = NULL;
+
+ dri_bo_map(slice_data_bo, 0);
+ slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
+ macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
+ slice_param->macroblock_offset,
+ pic_param->sequence_fields.bits.profile);
+ dri_bo_unmap(slice_data_bo);
+
+ if (next_slice_param)
+ next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
+ else
+ next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_size - (macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_offset + (macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_vertical_position << 16 |
+ next_slice_start_vert_pos << 0);
+ OUT_BCS_BATCH(batch,
+ (macroblock_offset & 0x7));
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
+ gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
+ gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct object_surface *obj_surface;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ int subsampling = SUBSAMPLE_YUV420;
+ int fourcc = VA_FOURCC_IMC3;
+
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ if (pic_param->num_components == 1)
+ subsampling = SUBSAMPLE_YUV400;
+ else if (pic_param->num_components == 3) {
+ int h1 = pic_param->components[0].h_sampling_factor;
+ int h2 = pic_param->components[1].h_sampling_factor;
+ int h3 = pic_param->components[2].h_sampling_factor;
+ int v1 = pic_param->components[0].v_sampling_factor;
+ int v2 = pic_param->components[1].v_sampling_factor;
+ int v3 = pic_param->components[2].v_sampling_factor;
+
+ if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1) {
+ subsampling = SUBSAMPLE_YUV420;
+ fourcc = VA_FOURCC_IMC3;
+ } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1) {
+ subsampling = SUBSAMPLE_YUV422H;
+ fourcc = VA_FOURCC_422H;
+ } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1) {
+ subsampling = SUBSAMPLE_YUV444;
+ fourcc = VA_FOURCC_444P;
+ } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1) {
+ subsampling = SUBSAMPLE_YUV411;
+ fourcc = VA_FOURCC_411P;
+ } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1) {
+ subsampling = SUBSAMPLE_YUV422V;
+ fourcc = VA_FOURCC_422V;
+ } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 2 && v3 == 2) {
+ subsampling = SUBSAMPLE_YUV422H;
+ fourcc = VA_FOURCC_422H;
+ } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
+ v1 == 2 && v2 == 1 && v3 == 1) {
+ subsampling = SUBSAMPLE_YUV422V;
+ fourcc = VA_FOURCC_422V;
+ } else
+ assert(0);
+ }
+ else {
+ assert(0);
+ }
+
+ /* Current decoded picture */
+ obj_surface = decode_state->render_object;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = 1;
+
+ gen7_mfd_context->post_deblocking_output.bo = NULL;
+ gen7_mfd_context->post_deblocking_output.valid = 0;
+
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+ gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static const int va_to_gen7_jpeg_rotation[4] = {
+ GEN7_JPEG_ROTATION_0,
+ GEN7_JPEG_ROTATION_90,
+ GEN7_JPEG_ROTATION_180,
+ GEN7_JPEG_ROTATION_270
+};
+
+static void
+gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ int chroma_type = GEN7_YUV420;
+ int frame_width_in_blks;
+ int frame_height_in_blks;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ if (pic_param->num_components == 1)
+ chroma_type = GEN7_YUV400;
+ else if (pic_param->num_components == 3) {
+ int h1 = pic_param->components[0].h_sampling_factor;
+ int h2 = pic_param->components[1].h_sampling_factor;
+ int h3 = pic_param->components[2].h_sampling_factor;
+ int v1 = pic_param->components[0].v_sampling_factor;
+ int v2 = pic_param->components[1].v_sampling_factor;
+ int v3 = pic_param->components[2].v_sampling_factor;
+
+ if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV420;
+ else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV422H_2Y;
+ else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV444;
+ else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV411;
+ else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV422V_2Y;
+ else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 2 && v3 == 2)
+ chroma_type = GEN7_YUV422H_4Y;
+ else if (h2 == 2 && h2 == 2 && h3 == 2 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV422V_4Y;
+ else
+ assert(0);
+ }
+
+ if (chroma_type == GEN7_YUV400 ||
+ chroma_type == GEN7_YUV444 ||
+ chroma_type == GEN7_YUV422V_2Y) {
+ frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
+ frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
+ } else if (chroma_type == GEN7_YUV411) {
+ frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
+ frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
+ } else {
+ frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
+ frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
+ }
+
+ BEGIN_BCS_BATCH(batch, 3);
+ OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
+ OUT_BCS_BATCH(batch,
+ (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
+ (chroma_type << 0));
+ OUT_BCS_BATCH(batch,
+ ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
+ ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static const int va_to_gen7_jpeg_hufftable[2] = {
+ MFX_HUFFTABLE_ID_Y,
+ MFX_HUFFTABLE_ID_UV
+};
+
+static void
+gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context,
+ int num_tables)
+{
+ VAHuffmanTableBufferJPEGBaseline *huffman_table;
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int index;
+
+ if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
+ return;
+
+ huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
+
+ for (index = 0; index < num_tables; index++) {
+ int id = va_to_gen7_jpeg_hufftable[index];
+ if (!huffman_table->load_huffman_table[index])
+ continue;
+ BEGIN_BCS_BATCH(batch, 53);
+ OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
+ OUT_BCS_BATCH(batch, id);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
+ ADVANCE_BCS_BATCH(batch);
+ }
+}
+
+static const int va_to_gen7_jpeg_qm[5] = {
+ -1,
+ MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
+ MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
+ MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
+ MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
+};
+
+static void
+gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ VAIQMatrixBufferJPEGBaseline *iq_matrix;
+ int index;
+
+ if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+ return;
+
+ iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ assert(pic_param->num_components <= 3);
+
+ for (index = 0; index < pic_param->num_components; index++) {
+ int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
+ int qm_type;
+ unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
+ unsigned char raster_qm[64];
+ int j;
+
+ if (id > 4 || id < 1)
+ continue;
+
+ if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
+ continue;
+
+ qm_type = va_to_gen7_jpeg_qm[id];
+
+ for (j = 0; j < 64; j++)
+ raster_qm[zigzag_direct[j]] = qm[j];
+
+ gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
+ }
+}
+
+static void
+gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferJPEGBaseline *pic_param,
+ VASliceParameterBufferJPEGBaseline *slice_param,
+ VASliceParameterBufferJPEGBaseline *next_slice_param,
+ dri_bo *slice_data_bo,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int scan_component_mask = 0;
+ int i;
+
+ assert(slice_param->num_components > 0);
+ assert(slice_param->num_components < 4);
+ assert(slice_param->num_components <= pic_param->num_components);
+
+ for (i = 0; i < slice_param->num_components; i++) {
+ switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
+ case 1:
+ scan_component_mask |= (1 << 0);
+ break;
+ case 2:
+ scan_component_mask |= (1 << 1);
+ break;
+ case 3:
+ scan_component_mask |= (1 << 2);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_size);
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_offset);
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_horizontal_position << 16 |
+ slice_param->slice_vertical_position << 0);
+ OUT_BCS_BATCH(batch,
+ ((slice_param->num_components != 1) << 30) | /* interleaved */
+ (scan_component_mask << 27) | /* scan components */
+ (0 << 26) | /* disable interrupt allowed */
+ (slice_param->num_mcus << 0)); /* MCU count */
+ OUT_BCS_BATCH(batch,
+ (slice_param->restart_interval << 0)); /* RestartInterval */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+/* Workaround for JPEG decoding on Ivybridge */
+#ifdef JPEG_WA
+
+static struct {
+ int width;
+ int height;
+ unsigned char data[32];
+ int data_size;
+ int data_bit_offset;
+ int qp;
+} gen7_jpeg_wa_clip = {
+ 16,
+ 16,
+ {
+ 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
+ 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
+ },
+ 14,
+ 40,
+ 28,
+};
+
+static void
+gen8_jpeg_wa_init(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ VAStatus status;
+ struct object_surface *obj_surface;
+
+ if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
+ i965_DestroySurfaces(ctx,
+ &gen7_mfd_context->jpeg_wa_surface_id,
+ 1);
+
+ status = i965_CreateSurfaces(ctx,
+ gen7_jpeg_wa_clip.width,
+ gen7_jpeg_wa_clip.height,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &gen7_mfd_context->jpeg_wa_surface_id);
+ assert(status == VA_STATUS_SUCCESS);
+
+ obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+ gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
+
+ if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
+ gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "JPEG WA data",
+ 0x1000,
+ 0x1000);
+ dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
+ 0,
+ gen7_jpeg_wa_clip.data_size,
+ gen7_jpeg_wa_clip.data);
+ }
+}
+
+static void
+gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ (MFX_LONG_MODE << 17) | /* Currently only support long format */
+ (MFD_MODE_VLD << 15) | /* VLD mode */
+ (0 << 10) | /* disable Stream-Out */
+ (0 << 9) | /* Post Deblocking Output */
+ (1 << 8) | /* Pre Deblocking Output */
+ (0 << 5) | /* not in stitch mode */
+ (MFX_CODEC_DECODE << 4) | /* decoding mode */
+ (MFX_FORMAT_AVC << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
+ OUT_BCS_BATCH(batch, 0); /* reserved */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_jpeg_wa_surface_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ ((obj_surface->orig_width - 1) << 18) |
+ ((obj_surface->orig_height - 1) << 4));
+ OUT_BCS_BATCH(batch,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (1 << 27) | /* interleave chroma, set to 0 for JPEG */
+ (0 << 22) | /* surface object control state, ignored */
+ ((obj_surface->width - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 */
+ (1 << 1) | /* must be tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for U(Cb), must be 0 */
+ (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for V(Cr), must be 0 */
+ (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ dri_bo *intra_bo;
+ int i;
+
+ intra_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ 128 * 64,
+ 0x1000);
+
+ BEGIN_BCS_BATCH(batch, 61);
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+ OUT_BCS_RELOC(batch,
+ obj_surface->bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+
+ OUT_BCS_BATCH(batch, 0); /* post deblocking */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* uncompressed-video & stream out 7-12 */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 13-15 is for intra row store scratch */
+ OUT_BCS_RELOC(batch,
+ intra_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 16-18 is for deblocking filter */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* DW 19..50 */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW52-54 is for mb status address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* the DW56-60 is for ILDB & second ILDB address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_unreference(intra_bo);
+}
+
+static void
+gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ dri_bo *bsd_mpc_bo, *mpr_bo;
+
+ bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ 11520, /* 1.5 * 120 * 64 */
+ 0x1000);
+
+ mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "mpr row store",
+ 7680, /* 1. 0 * 120 * 64 */
+ 0x1000);
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+
+ OUT_BCS_RELOC(batch,
+ bsd_mpc_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_RELOC(batch,
+ mpr_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_unreference(bsd_mpc_bo);
+ dri_bo_unreference(mpr_bo);
+}
+
+static void
+gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+
+}
+
+static void
+gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int img_struct = 0;
+ int mbaff_frame_flag = 0;
+ unsigned int width_in_mbs = 1, height_in_mbs = 1;
+
+ BEGIN_BCS_BATCH(batch, 16);
+ OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+ OUT_BCS_BATCH(batch,
+ width_in_mbs * height_in_mbs);
+ OUT_BCS_BATCH(batch,
+ ((height_in_mbs - 1) << 16) |
+ ((width_in_mbs - 1) << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 24) |
+ (0 << 16) |
+ (0 << 14) |
+ (0 << 13) |
+ (0 << 12) | /* differ from GEN6 */
+ (0 << 10) |
+ (img_struct << 8));
+ OUT_BCS_BATCH(batch,
+ (1 << 10) | /* 4:2:0 */
+ (1 << 7) | /* CABAC */
+ (0 << 6) |
+ (0 << 5) |
+ (0 << 4) |
+ (0 << 3) |
+ (1 << 2) |
+ (mbaff_frame_flag << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 71);
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+ /* reference surfaces 0..15 */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0); /* top */
+ OUT_BCS_BATCH(batch, 0); /* bottom */
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the current decoding frame/field */
+ OUT_BCS_BATCH(batch, 0); /* top */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* POC List */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 11);
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+ OUT_BCS_RELOC(batch,
+ gen7_mfd_context->jpeg_wa_slice_data_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ /* the input bitsteam format on GEN7 differs from GEN6 */
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ (0 << 31) |
+ (0 << 14) |
+ (0 << 12) |
+ (0 << 10) |
+ (0 << 8));
+ OUT_BCS_BATCH(batch,
+ ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
+ (0 << 5) |
+ (0 << 4) |
+ (1 << 3) | /* LastSlice Flag */
+ (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
+ int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
+ int first_mb_in_slice = 0;
+ int slice_type = SLICE_TYPE_I;
+
+ BEGIN_BCS_BATCH(batch, 11);
+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
+ OUT_BCS_BATCH(batch, slice_type);
+ OUT_BCS_BATCH(batch,
+ (num_ref_idx_l1 << 24) |
+ (num_ref_idx_l0 << 16) |
+ (0 << 8) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 29) |
+ (1 << 27) | /* disable Deblocking */
+ (0 << 24) |
+ (gen7_jpeg_wa_clip.qp << 16) |
+ (0 << 8) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch,
+ (slice_ver_pos << 24) |
+ (slice_hor_pos << 16) |
+ (first_mb_in_slice << 0));
+ OUT_BCS_BATCH(batch,
+ (next_slice_ver_pos << 16) |
+ (next_slice_hor_pos << 0));
+ OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_jpeg_wa(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ gen8_jpeg_wa_init(ctx, gen7_mfd_context);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
+
+ gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
+ gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
+}
+
+#endif
+
+void
+gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j, max_selector = 0;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ /* Currently only support Baseline DCT */
+ gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+#ifdef JPEG_WA
+ gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
+#endif
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+ gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+ gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+ gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
+ gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ int component;
+
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ for (component = 0; component < slice_param->num_components; component++) {
+ if (max_selector < slice_param->components[component].dc_table_selector)
+ max_selector = slice_param->components[component].dc_table_selector;
+
+ if (max_selector < slice_param->components[component].ac_table_selector)
+ max_selector = slice_param->components[component].ac_table_selector;
+ }
+
+ slice_param++;
+ }
+ }
+
+ assert(max_selector < 2);
+ gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static const int vp8_dc_qlookup[128] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
+ 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
+ 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
+ 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
+};
+
+static const int vp8_ac_qlookup[128] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+ 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
+ 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
+ 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
+ 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
+ 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
+};
+
+static inline unsigned int vp8_clip_quantization_index(int index)
+{
+ if(index > 127)
+ return 127;
+ else if(index <0)
+ return 0;
+
+ return index;
+}
+
+static void
+gen8_mfd_vp8_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct object_surface *obj_surface;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ dri_bo *bo;
+ VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
+ int width_in_mbs = (pic_param->frame_width + 15) / 16;
+ int height_in_mbs = (pic_param->frame_height + 15) / 16;
+
+ assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
+ assert(height_in_mbs > 0 && height_in_mbs <= 256);
+
+ intel_update_vp8_frame_store_index(ctx,
+ decode_state,
+ pic_param,
+ gen7_mfd_context->reference_surface);
+
+ /* Current decoded picture */
+ obj_surface = decode_state->render_object;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+
+ dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
+
+ intel_ensure_vp8_segmentation_buffer(ctx,
+ &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
+
+ /* The same as AVC */
+ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ width_in_mbs * 64,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "deblocking filter row store",
+ width_in_mbs * 64 * 4,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ width_in_mbs * 64 * 2,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "mpr row store",
+ width_in_mbs * 64 * 2,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
+
+ gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen8_mfd_vp8_pic_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
+ VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
+ VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
+ dri_bo *probs_bo = decode_state->probability_data->bo;
+ int i, j,log2num;
+ unsigned int quantization_value[4][6];
+
+ /* There is no safe way to error out if the segmentation buffer
+ could not be allocated. So, instead of aborting, simply decode
+ something even if the result may look totally inacurate */
+ const unsigned int enable_segmentation =
+ pic_param->pic_fields.bits.segmentation_enabled &&
+ gen7_mfd_context->segmentation_buffer.valid;
+
+ log2num = (int)log2(slice_param->num_of_partitions - 1);
+
+ BEGIN_BCS_BATCH(batch, 38);
+ OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
+ OUT_BCS_BATCH(batch,
+ (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
+ (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
+ OUT_BCS_BATCH(batch,
+ log2num << 24 |
+ pic_param->pic_fields.bits.sharpness_level << 16 |
+ pic_param->pic_fields.bits.sign_bias_alternate << 13 |
+ pic_param->pic_fields.bits.sign_bias_golden << 12 |
+ pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
+ pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
+ pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
+ pic_param->pic_fields.bits.segmentation_enabled << 8 |
+ (enable_segmentation &&
+ !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
+ (enable_segmentation &&
+ pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
+ (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
+ pic_param->pic_fields.bits.filter_type << 4 |
+ (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
+ !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
+
+ OUT_BCS_BATCH(batch,
+ pic_param->loop_filter_level[3] << 24 |
+ pic_param->loop_filter_level[2] << 16 |
+ pic_param->loop_filter_level[1] << 8 |
+ pic_param->loop_filter_level[0] << 0);
+
+ /* Quantizer Value for 4 segmetns, DW4-DW15 */
+ for (i = 0; i < 4; i++) {
+ quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
+ quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
+ quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
+ /* 101581>>16 is equivalent to 155/100 */
+ quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
+ quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
+ quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
+
+ quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
+ quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
+
+ OUT_BCS_BATCH(batch,
+ quantization_value[i][0] << 16 | /* Y1AC */
+ quantization_value[i][1] << 0); /* Y1DC */
+ OUT_BCS_BATCH(batch,
+ quantization_value[i][5] << 16 | /* UVAC */
+ quantization_value[i][4] << 0); /* UVDC */
+ OUT_BCS_BATCH(batch,
+ quantization_value[i][3] << 16 | /* Y2AC */
+ quantization_value[i][2] << 0); /* Y2DC */
+ }
+
+ /* CoeffProbability table for non-key frame, DW16-DW18 */
+ if (probs_bo) {
+ OUT_BCS_RELOC(batch, probs_bo,
+ 0, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch,
+ pic_param->mb_segment_tree_probs[2] << 16 |
+ pic_param->mb_segment_tree_probs[1] << 8 |
+ pic_param->mb_segment_tree_probs[0] << 0);
+
+ OUT_BCS_BATCH(batch,
+ pic_param->prob_skip_false << 24 |
+ pic_param->prob_intra << 16 |
+ pic_param->prob_last << 8 |
+ pic_param->prob_gf << 0);
+
+ OUT_BCS_BATCH(batch,
+ pic_param->y_mode_probs[3] << 24 |
+ pic_param->y_mode_probs[2] << 16 |
+ pic_param->y_mode_probs[1] << 8 |
+ pic_param->y_mode_probs[0] << 0);
+
+ OUT_BCS_BATCH(batch,
+ pic_param->uv_mode_probs[2] << 16 |
+ pic_param->uv_mode_probs[1] << 8 |
+ pic_param->uv_mode_probs[0] << 0);
+
+ /* MV update value, DW23-DW32 */
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 20; j += 4) {
+ OUT_BCS_BATCH(batch,
+ (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
+ pic_param->mv_probs[i][j + 2] << 16 |
+ pic_param->mv_probs[i][j + 1] << 8 |
+ pic_param->mv_probs[i][j + 0] << 0);
+ }
+ }
+
+ OUT_BCS_BATCH(batch,
+ (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
+ (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
+ (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
+ (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
+
+ OUT_BCS_BATCH(batch,
+ (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
+ (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
+ (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
+ (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
+
+ /* segmentation id stream base address, DW35-DW37 */
+ if (enable_segmentation) {
+ OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
+ 0, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferVP8 *pic_param,
+ VASliceParameterBufferVP8 *slice_param,
+ dri_bo *slice_data_bo,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int i, log2num;
+ unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
+ unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
+ unsigned int partition_size_0 = slice_param->partition_size[0];
+
+ assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
+ if (used_bits == 8) {
+ used_bits = 0;
+ offset += 1;
+ partition_size_0 -= 1;
+ }
+
+ assert(slice_param->num_of_partitions >= 2);
+ assert(slice_param->num_of_partitions <= 9);
+
+ log2num = (int)log2(slice_param->num_of_partitions - 1);
+
+ BEGIN_BCS_BATCH(batch, 22);
+ OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
+ OUT_BCS_BATCH(batch,
+ used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
+ pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
+ log2num << 4 |
+ (slice_param->macroblock_offset & 0x7));
+ OUT_BCS_BATCH(batch,
+ pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
+ 0);
+
+ OUT_BCS_BATCH(batch, partition_size_0);
+ OUT_BCS_BATCH(batch, offset);
+ //partion sizes in bytes are present after the above first partition when there are more than one token partition
+ offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
+ for (i = 1; i < 9; i++) {
+ if (i < slice_param->num_of_partitions) {
+ OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
+ OUT_BCS_BATCH(batch, offset);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ offset += slice_param->partition_size[i];
+ }
+
+ OUT_BCS_BATCH(batch,
+ 1 << 31 | /* concealment method */
+ 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+void
+gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVP8 *pic_param;
+ VASliceParameterBufferVP8 *slice_param;
+ dri_bo *slice_data_bo;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
+
+ /* one slice per frame */
+ if (decode_state->num_slice_params != 1 ||
+ (!decode_state->slice_params ||
+ !decode_state->slice_params[0] ||
+ (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
+ (!decode_state->slice_datas ||
+ !decode_state->slice_datas[0] ||
+ !decode_state->slice_datas[0]->bo) ||
+ !decode_state->probability_data) {
+ WARN_ONCE("Wrong parameters for VP8 decoding\n");
+
+ return;
+ }
+
+ slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
+ slice_data_bo = decode_state->slice_datas[0]->bo;
+
+ gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
+ gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
+ gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
+ gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
+ gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
+ gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
+ gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static VAStatus
+gen8_mfd_decode_picture(VADriverContextP ctx,
+ VAProfile profile,
+ union codec_state *codec_state,
+ struct hw_context *hw_context)
+
+{
+ struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
+ struct decode_state *decode_state = &codec_state->decode;
+ VAStatus vaStatus;
+
+ assert(gen7_mfd_context);
+
+ vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
+
+ if (vaStatus != VA_STATUS_SUCCESS)
+ goto out;
+
+ gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
+
+ switch (profile) {
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ case VAProfileH264StereoHigh:
+ case VAProfileH264MultiviewHigh:
+ gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ case VAProfileVC1Simple:
+ case VAProfileVC1Main:
+ case VAProfileVC1Advanced:
+ gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ case VAProfileJPEGBaseline:
+ gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ case VAProfileVP8Version0_3:
+ gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ vaStatus = VA_STATUS_SUCCESS;
+
+out:
+ return vaStatus;
+}
+
+static void
+gen8_mfd_context_destroy(void *hw_context)
+{
+ struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
+
+ dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
+ gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
+ gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
+ gen7_mfd_context->segmentation_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
+
+ intel_batchbuffer_free(gen7_mfd_context->base.batch);
+ free(gen7_mfd_context);
+}
+
+static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
+ gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
+ gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
+ gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
+}
+
+struct hw_context *
+gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+ struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
+ int i;
+
+ gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
+ gen7_mfd_context->base.run = gen8_mfd_decode_picture;
+ gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
+
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+ gen7_mfd_context->reference_surface[i].frame_store_id = -1;
+ }
+
+ gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
+ gen7_mfd_context->segmentation_buffer.valid = 0;
+
+ switch (obj_config->profile) {
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
+ break;
+
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ case VAProfileH264StereoHigh:
+ case VAProfileH264MultiviewHigh:
+ gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
+ break;
+ default:
+ break;
+ }
+ return (struct hw_context *)gen7_mfd_context;
+}
diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
new file mode 100644
index 0000000..1e6068d
--- /dev/null
+++ b/src/gen8_post_processing.c
@@ -0,0 +1,1466 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang@intel.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+#include "i965_defines.h"
+#include "i965_structs.h"
+#include "i965_drv_video.h"
+#include "i965_post_processing.h"
+#include "i965_render.h"
+#include "intel_media.h"
+
+#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
+
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
+
+#define GPU_ASM_BLOCK_WIDTH 16
+#define GPU_ASM_BLOCK_HEIGHT 8
+#define GPU_ASM_X_OFFSET_ALIGNMENT 4
+
+#define VA_STATUS_SUCCESS_1 0xFFFFFFFE
+
+static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+
+static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+
+/* TODO: Modify the shader and then compile it again.
+ * Currently it is derived from Haswell*/
+static const uint32_t pp_null_gen8[][4] = {
+};
+
+static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl2_to_pl3.g8b"
+};
+
+static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl3_to_pl2.g8b"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl3_to_pl3.g8b"
+};
+
+static const uint32_t pp_nv12_scaling_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
+};
+
+static const uint32_t pp_nv12_avs_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
+};
+
+static const uint32_t pp_nv12_dndi_gen8[][4] = {
+// #include "shaders/post_processing/gen7/dndi.g75b"
+};
+
+static const uint32_t pp_nv12_dn_gen8[][4] = {
+// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
+};
+static const uint32_t pp_nv12_load_save_pa_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl2_to_pa.g8b"
+};
+static const uint32_t pp_pl3_load_save_pa_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl3_to_pa.g8b"
+};
+static const uint32_t pp_pa_load_save_nv12_gen8[][4] = {
+#include "shaders/post_processing/gen8/pa_to_pl2.g8b"
+};
+static const uint32_t pp_pa_load_save_pl3_gen8[][4] = {
+#include "shaders/post_processing/gen8/pa_to_pl3.g8b"
+};
+static const uint32_t pp_pa_load_save_pa_gen8[][4] = {
+#include "shaders/post_processing/gen8/pa_to_pa.g8b"
+};
+static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = {
+#include "shaders/post_processing/gen8/rgbx_to_nv12.g8b"
+};
+static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = {
+#include "shaders/post_processing/gen8/pl2_to_rgbx.g8b"
+};
+
+static struct pp_module pp_modules_gen8[] = {
+ {
+ {
+ "NULL module (for testing)",
+ PP_NULL,
+ pp_null_gen8,
+ sizeof(pp_null_gen8),
+ NULL,
+ },
+
+ pp_null_initialize,
+ },
+
+ {
+ {
+ "NV12_NV12",
+ PP_NV12_LOAD_SAVE_N12,
+ pp_nv12_load_save_nv12_gen8,
+ sizeof(pp_nv12_load_save_nv12_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12_PL3",
+ PP_NV12_LOAD_SAVE_PL3,
+ pp_nv12_load_save_pl3_gen8,
+ sizeof(pp_nv12_load_save_pl3_gen8),
+ NULL,
+ },
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_NV12",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_nv12_gen8,
+ sizeof(pp_pl3_load_save_nv12_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_PL3",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_pl3_gen8,
+ sizeof(pp_pl3_load_save_pl3_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 Scaling module",
+ PP_NV12_SCALING,
+ pp_nv12_scaling_gen8,
+ sizeof(pp_nv12_scaling_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 AVS module",
+ PP_NV12_AVS,
+ pp_nv12_avs_gen8,
+ sizeof(pp_nv12_avs_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 DNDI module",
+ PP_NV12_DNDI,
+ pp_nv12_dndi_gen8,
+ sizeof(pp_nv12_dndi_gen8),
+ NULL,
+ },
+
+ pp_null_initialize,
+ },
+
+ {
+ {
+ "NV12 DN module",
+ PP_NV12_DN,
+ pp_nv12_dn_gen8,
+ sizeof(pp_nv12_dn_gen8),
+ NULL,
+ },
+
+ pp_null_initialize,
+ },
+ {
+ {
+ "NV12_PA module",
+ PP_NV12_LOAD_SAVE_PA,
+ pp_nv12_load_save_pa_gen8,
+ sizeof(pp_nv12_load_save_pa_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_PA module",
+ PP_PL3_LOAD_SAVE_PA,
+ pp_pl3_load_save_pa_gen8,
+ sizeof(pp_pl3_load_save_pa_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PA_NV12 module",
+ PP_PA_LOAD_SAVE_NV12,
+ pp_pa_load_save_nv12_gen8,
+ sizeof(pp_pa_load_save_nv12_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PA_PL3 module",
+ PP_PA_LOAD_SAVE_PL3,
+ pp_pa_load_save_pl3_gen8,
+ sizeof(pp_pa_load_save_pl3_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PA_PA module",
+ PP_PA_LOAD_SAVE_PA,
+ pp_pa_load_save_pa_gen8,
+ sizeof(pp_pa_load_save_pa_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "RGBX_NV12 module",
+ PP_RGBX_LOAD_SAVE_NV12,
+ pp_rgbx_load_save_nv12_gen8,
+ sizeof(pp_rgbx_load_save_nv12_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12_RGBX module",
+ PP_NV12_LOAD_SAVE_RGBX,
+ pp_nv12_load_save_rgbx_gen8,
+ sizeof(pp_nv12_load_save_rgbx_gen8),
+ NULL,
+ },
+
+ gen8_pp_plx_avs_initialize,
+ },
+};
+
+static int
+pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
+{
+ int fourcc;
+
+ if (surface->type == I965_SURFACE_TYPE_IMAGE) {
+ struct object_image *obj_image = (struct object_image *)surface->base;
+ fourcc = obj_image->image.format.fourcc;
+ } else {
+ struct object_surface *obj_surface = (struct object_surface *)surface->base;
+ fourcc = obj_surface->fourcc;
+ }
+
+ return fourcc;
+}
+
+static void
+gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss0.tiled_surface = 0;
+ ss->ss0.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
+gen8_pp_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss2.tiled_surface = 0;
+ ss->ss2.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+
+static void
+gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ dri_bo *surf_bo, unsigned long surf_bo_offset,
+ int width, int height, int pitch, int format,
+ int index, int is_target)
+{
+ struct gen8_surface_state *ss;
+ dri_bo *ss_bo;
+ unsigned int tiling;
+ unsigned int swizzle;
+
+ dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+ ss_bo = pp_context->surface_state_binding_table.bo;
+ assert(ss_bo);
+
+ dri_bo_map(ss_bo, True);
+ assert(ss_bo->virtual);
+ ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = format;
+ ss->ss8.base_addr = surf_bo->offset + surf_bo_offset;
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+ ss->ss3.pitch = pitch - 1;
+
+ /* Always set 1(align 4 mode) per B-spec */
+ ss->ss0.vertical_alignment = 1;
+ ss->ss0.horizontal_alignment = 1;
+
+ gen8_pp_set_surface_tiling(ss, tiling);
+ gen8_render_set_surface_scs(ss);
+ dri_bo_emit_reloc(ss_bo,
+ I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
+ surf_bo_offset,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
+ surf_bo);
+ ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss_bo);
+}
+
+
+static void
+gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ dri_bo *surf_bo, unsigned long surf_bo_offset,
+ int width, int height, int wpitch,
+ int xoffset, int yoffset,
+ int format, int interleave_chroma,
+ int index)
+{
+ struct gen8_surface_state2 *ss2;
+ dri_bo *ss2_bo;
+ unsigned int tiling;
+ unsigned int swizzle;
+
+ dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+ ss2_bo = pp_context->surface_state_binding_table.bo;
+ assert(ss2_bo);
+
+ dri_bo_map(ss2_bo, True);
+ assert(ss2_bo->virtual);
+ ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss2, 0, sizeof(*ss2));
+ ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset;
+ ss2->ss1.cbcr_pixel_offset_v_direction = 0;
+ ss2->ss1.width = width - 1;
+ ss2->ss1.height = height - 1;
+ ss2->ss2.pitch = wpitch - 1;
+ ss2->ss2.interleave_chroma = interleave_chroma;
+ ss2->ss2.surface_format = format;
+ ss2->ss3.x_offset_for_cb = xoffset;
+ ss2->ss3.y_offset_for_cb = yoffset;
+ gen8_pp_set_surface2_tiling(ss2, tiling);
+ dri_bo_emit_reloc(ss2_bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ surf_bo_offset,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state2, ss6),
+ surf_bo);
+ ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss2_bo);
+}
+
+static void
+gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *surface,
+ int base_index, int is_target,
+ const VARectangle *rect,
+ int *width, int *height, int *pitch, int *offset)
+{
+ struct object_surface *obj_surface;
+ struct object_image *obj_image;
+ dri_bo *bo;
+ int fourcc = pp_get_surface_fourcc(ctx, surface);
+ const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
+
+ if (fourcc_info == NULL)
+ return;
+
+ if (surface->type == I965_SURFACE_TYPE_SURFACE) {
+ obj_surface = (struct object_surface *)surface->base;
+ bo = obj_surface->bo;
+ width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
+ height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
+ pitch[0] = obj_surface->width;
+ offset[0] = 0;
+
+ if (fourcc_info->num_planes == 1 && is_target)
+ width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
+
+ width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
+ height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
+ pitch[1] = obj_surface->cb_cr_pitch;
+ offset[1] = obj_surface->y_cb_offset * obj_surface->width;
+
+ width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
+ height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
+ pitch[2] = obj_surface->cb_cr_pitch;
+ offset[2] = obj_surface->y_cr_offset * obj_surface->width;
+ } else {
+ int U = 0, V = 0;
+
+ /* FIXME: add support for ARGB/ABGR image */
+ obj_image = (struct object_image *)surface->base;
+ bo = obj_image->bo;
+ width[0] = MIN(rect->x + rect->width, obj_image->image.width);
+ height[0] = MIN(rect->y + rect->height, obj_image->image.height);
+ pitch[0] = obj_image->image.pitches[0];
+ offset[0] = obj_image->image.offsets[0];
+
+ if (fourcc_info->num_planes == 1) {
+ if (is_target)
+ width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
+ } else if (fourcc_info->num_planes == 2) {
+ U = 1, V = 1;
+ } else {
+ assert(fourcc_info->num_components == 3);
+
+ U = fourcc_info->components[1].plane;
+ V = fourcc_info->components[2].plane;
+ assert((U == 1 && V == 2) ||
+ (U == 2 && V == 1));
+ }
+
+ /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
+ width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
+ height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
+ pitch[1] = obj_image->image.pitches[U];
+ offset[1] = obj_image->image.offsets[U];
+
+ width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
+ height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
+ pitch[2] = obj_image->image.pitches[V];
+ offset[2] = obj_image->image.offsets[V];
+ }
+
+ if (is_target) {
+ gen8_pp_set_surface_state(ctx, pp_context,
+ bo, 0,
+ width[0] / 4, height[0], pitch[0],
+ I965_SURFACEFORMAT_R8_UINT,
+ base_index, 1);
+
+ if (fourcc_info->num_planes == 2) {
+ gen8_pp_set_surface_state(ctx, pp_context,
+ bo, offset[1],
+ width[1] / 2, height[1], pitch[1],
+ I965_SURFACEFORMAT_R8G8_SINT,
+ base_index + 1, 1);
+ } else if (fourcc_info->num_planes == 3) {
+ gen8_pp_set_surface_state(ctx, pp_context,
+ bo, offset[1],
+ width[1] / 4, height[1], pitch[1],
+ I965_SURFACEFORMAT_R8_SINT,
+ base_index + 1, 1);
+ gen8_pp_set_surface_state(ctx, pp_context,
+ bo, offset[2],
+ width[2] / 4, height[2], pitch[2],
+ I965_SURFACEFORMAT_R8_SINT,
+ base_index + 2, 1);
+ }
+
+ if (fourcc_info->format == I965_COLOR_RGB) {
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ /* the format is MSB: X-B-G-R */
+ pp_static_parameter->grf2.save_avs_rgb_swap = 0;
+ if ((fourcc == VA_FOURCC_BGRA) ||
+ (fourcc == VA_FOURCC_BGRX)) {
+ /* It is stored as MSB: X-R-G-B */
+ pp_static_parameter->grf2.save_avs_rgb_swap = 1;
+ }
+ }
+ } else {
+ int format0 = SURFACE_FORMAT_Y8_UNORM;
+
+ switch (fourcc) {
+ case VA_FOURCC_YUY2:
+ format0 = SURFACE_FORMAT_YCRCB_NORMAL;
+ break;
+
+ case VA_FOURCC_UYVY:
+ format0 = SURFACE_FORMAT_YCRCB_SWAPY;
+ break;
+
+ default:
+ break;
+ }
+
+ if (fourcc_info->format == I965_COLOR_RGB) {
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
+ format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
+ pp_static_parameter->grf2.src_avs_rgb_swap = 0;
+ if ((fourcc == VA_FOURCC_BGRA) ||
+ (fourcc == VA_FOURCC_BGRX)) {
+ pp_static_parameter->grf2.src_avs_rgb_swap = 1;
+ }
+ }
+
+ gen8_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[0],
+ width[0], height[0], pitch[0],
+ 0, 0,
+ format0, 0,
+ base_index);
+
+ if (fourcc_info->num_planes == 2) {
+ gen8_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[1],
+ width[1], height[1], pitch[1],
+ 0, 0,
+ SURFACE_FORMAT_R8B8_UNORM, 0,
+ base_index + 1);
+ } else if (fourcc_info->num_planes == 3) {
+ gen8_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[1],
+ width[1], height[1], pitch[1],
+ 0, 0,
+ SURFACE_FORMAT_R8_UNORM, 0,
+ base_index + 1);
+ gen8_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[2],
+ width[2], height[2], pitch[2],
+ 0, 0,
+ SURFACE_FORMAT_R8_UNORM, 0,
+ base_index + 2);
+ }
+ }
+}
+
+static int
+pp_null_x_steps(void *private_context)
+{
+ return 1;
+}
+
+static int
+pp_null_y_steps(void *private_context)
+{
+ return 1;
+}
+
+static int
+pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+ return 0;
+}
+
+static VAStatus
+pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+ /* private function & data */
+ pp_context->pp_x_steps = pp_null_x_steps;
+ pp_context->pp_y_steps = pp_null_y_steps;
+ pp_context->private_context = NULL;
+ pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
+
+ dst_surface->flags = src_surface->flags;
+
+ return VA_STATUS_SUCCESS;
+}
+
+static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
+{
+ int i, dst_width_adjust;
+ /* x offset of dest surface must be dword aligned.
+ * so we have to extend dst surface on left edge, and mask out pixels not interested
+ */
+ if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
+ pp_context->block_horizontal_mask_left = 0;
+ for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
+ {
+ pp_context->block_horizontal_mask_left |= 1<<i;
+ }
+ }
+ else {
+ pp_context->block_horizontal_mask_left = 0xffff;
+ }
+
+ dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+ if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
+ pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
+ }
+ else {
+ pp_context->block_horizontal_mask_right = 0xffff;
+ }
+
+ if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
+ pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
+ }
+ else {
+ pp_context->block_vertical_mask_bottom = 0xff;
+ }
+
+}
+
+static int
+gen7_pp_avs_x_steps(void *private_context)
+{
+ struct pp_avs_context *pp_avs_context = private_context;
+
+ return pp_avs_context->dest_w / 16;
+}
+
+static int
+gen7_pp_avs_y_steps(void *private_context)
+{
+ struct pp_avs_context *pp_avs_context = private_context;
+
+ return pp_avs_context->dest_h / 16;
+}
+
+static int
+gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
+ struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
+ pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
+ pp_inline_parameter->grf7.constant_0 = 0xffffffff;
+ pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
+
+ return 0;
+}
+
+static void gen7_update_src_surface_uv_offset(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *surface)
+{
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ int fourcc = pp_get_surface_fourcc(ctx, surface);
+
+ if (fourcc == VA_FOURCC_YUY2) {
+ pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
+ pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
+ pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
+ } else if (fourcc == VA_FOURCC_UYVY) {
+ pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
+ pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
+ pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
+ }
+}
+
+static VAStatus
+gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+/* TODO: Add the sampler_8x8 state */
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ struct gen8_sampler_8x8_avs *sampler_8x8;
+ struct i965_sampler_8x8_coefficient *sampler_8x8_state;
+ int i;
+ int width[3], height[3], pitch[3], offset[3];
+ int src_width, src_height;
+ unsigned char *cc_ptr;
+
+ memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter));
+
+ /* source surface */
+ gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
+ src_rect,
+ width, height, pitch, offset);
+ src_height = height[0];
+ src_width = width[0];
+
+ /* destination surface */
+ gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
+ dst_rect,
+ width, height, pitch, offset);
+
+ /* sampler 8x8 state */
+ dri_bo_map(pp_context->dynamic_state.bo, True);
+ assert(pp_context->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) pp_context->dynamic_state.bo->virtual +
+ pp_context->sampler_offset;
+ /* Currently only one gen8 sampler_8x8 is initialized */
+ sampler_8x8 = (struct gen8_sampler_8x8_avs *) cc_ptr;
+ memset(sampler_8x8, 0, sizeof(*sampler_8x8));
+
+ sampler_8x8->dw0.gain_factor = 44;
+ sampler_8x8->dw0.weak_edge_threshold = 1;
+ sampler_8x8->dw0.strong_edge_threshold = 8;
+ /* Use the value like that on Ivy instead of default
+ * sampler_8x8->dw0.r3x_coefficient = 5;
+ */
+ sampler_8x8->dw0.r3x_coefficient = 27;
+ sampler_8x8->dw0.r3c_coefficient = 5;
+
+ sampler_8x8->dw2.global_noise_estimation = 255;
+ sampler_8x8->dw2.non_edge_weight = 1;
+ sampler_8x8->dw2.regular_weight = 2;
+ sampler_8x8->dw2.strong_edge_weight = 7;
+ /* Use the value like that on Ivy instead of default
+ * sampler_8x8->dw2.r5x_coefficient = 7;
+ * sampler_8x8->dw2.r5cx_coefficient = 7;
+ * sampler_8x8->dw2.r5c_coefficient = 7;
+ */
+ sampler_8x8->dw2.r5x_coefficient = 9;
+ sampler_8x8->dw2.r5cx_coefficient = 8;
+ sampler_8x8->dw2.r5c_coefficient = 3;
+
+ sampler_8x8->dw3.sin_alpha = 101; /* sin_alpha = 0 */
+ sampler_8x8->dw3.cos_alpha = 79; /* cos_alpha = 0 */
+ sampler_8x8->dw3.sat_max = 0x1f;
+ sampler_8x8->dw3.hue_max = 14;
+ /* The 8tap filter will determine whether the adaptive Filter is
+ * applied for all channels(dw153).
+ * If the 8tap filter is disabled, the adaptive filter should be disabled.
+ * Only when 8tap filter is enabled, it can be enabled or not.
+ */
+ sampler_8x8->dw3.enable_8tap_filter = 3;
+ sampler_8x8->dw3.ief4_smooth_enable = 0;
+
+ sampler_8x8->dw4.s3u = 0;
+ sampler_8x8->dw4.diamond_margin = 4;
+ sampler_8x8->dw4.vy_std_enable = 0;
+ sampler_8x8->dw4.umid = 110;
+ sampler_8x8->dw4.vmid = 154;
+
+ sampler_8x8->dw5.diamond_dv = 0;
+ sampler_8x8->dw5.diamond_th = 35;
+ sampler_8x8->dw5.diamond_alpha = 100; /* diamond_alpha = 0 */
+ sampler_8x8->dw5.hs_margin = 3;
+ sampler_8x8->dw5.diamond_du = 2;
+
+ sampler_8x8->dw6.y_point1 = 46;
+ sampler_8x8->dw6.y_point2 = 47;
+ sampler_8x8->dw6.y_point3 = 254;
+ sampler_8x8->dw6.y_point4 = 255;
+
+ sampler_8x8->dw7.inv_margin_vyl = 3300; /* inv_margin_vyl = 0 */
+
+ sampler_8x8->dw8.inv_margin_vyu = 1600; /* inv_margin_vyu = 0 */
+ sampler_8x8->dw8.p0l = 46;
+ sampler_8x8->dw8.p1l = 216;
+
+ sampler_8x8->dw9.p2l = 236;
+ sampler_8x8->dw9.p3l = 236;
+ sampler_8x8->dw9.b0l = 133;
+ sampler_8x8->dw9.b1l = 130;
+
+ sampler_8x8->dw10.b2l = 130;
+ sampler_8x8->dw10.b3l = 130;
+ /* s0l = -5 / 256. s2.8 */
+ sampler_8x8->dw10.s0l = 1029; /* s0l = 0 */
+ sampler_8x8->dw10.y_slope2 = 31; /* y_slop2 = 0 */
+
+ sampler_8x8->dw11.s1l = 0;
+ sampler_8x8->dw11.s2l = 0;
+
+ sampler_8x8->dw12.s3l = 0;
+ sampler_8x8->dw12.p0u = 46;
+ sampler_8x8->dw12.p1u = 66;
+ sampler_8x8->dw12.y_slope1 = 31; /* y_slope1 = 0 */
+
+ sampler_8x8->dw13.p2u = 130;
+ sampler_8x8->dw13.p3u = 236;
+ sampler_8x8->dw13.b0u = 143;
+ sampler_8x8->dw13.b1u = 163;
+
+ sampler_8x8->dw14.b2u = 200;
+ sampler_8x8->dw14.b3u = 140;
+ sampler_8x8->dw14.s0u = 256; /* s0u = 0 */
+
+ sampler_8x8->dw15.s1u = 113; /* s1u = 0 */
+ sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */
+
+ sampler_8x8_state = sampler_8x8->coefficients;
+
+ for (i = 0; i < 17; i++) {
+ float coff;
+ coff = i;
+ coff = coff / 16;
+
+ memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
+ /* for Y channel, currently ignore */
+ sampler_8x8_state->dw0.table_0x_filter_c0 = 0x0;
+ sampler_8x8_state->dw0.table_0x_filter_c1 = 0x0;
+ sampler_8x8_state->dw0.table_0x_filter_c2 = 0x0;
+ sampler_8x8_state->dw0.table_0x_filter_c3 =
+ intel_format_convert(1 - coff, 1, 6, 0);
+ sampler_8x8_state->dw1.table_0x_filter_c4 =
+ intel_format_convert(coff, 1, 6, 0);
+ sampler_8x8_state->dw1.table_0x_filter_c5 = 0x0;
+ sampler_8x8_state->dw1.table_0x_filter_c6 = 0x0;
+ sampler_8x8_state->dw1.table_0x_filter_c7 = 0x0;
+ sampler_8x8_state->dw2.table_0y_filter_c0 = 0x0;
+ sampler_8x8_state->dw2.table_0y_filter_c1 = 0x0;
+ sampler_8x8_state->dw2.table_0y_filter_c2 = 0x0;
+ sampler_8x8_state->dw2.table_0y_filter_c3 =
+ intel_format_convert(1 - coff, 1, 6, 0);
+ sampler_8x8_state->dw3.table_0y_filter_c4 =
+ intel_format_convert(coff, 1, 6, 0);
+ sampler_8x8_state->dw3.table_0y_filter_c5 = 0x0;
+ sampler_8x8_state->dw3.table_0y_filter_c6 = 0x0;
+ sampler_8x8_state->dw3.table_0y_filter_c7 = 0x0;
+ /* for U/V channel, 0.25 */
+ sampler_8x8_state->dw4.table_1x_filter_c0 = 0x0;
+ sampler_8x8_state->dw4.table_1x_filter_c1 = 0x0;
+ sampler_8x8_state->dw4.table_1x_filter_c2 = 0x0;
+ sampler_8x8_state->dw4.table_1x_filter_c3 =
+ intel_format_convert(1 - coff, 1, 6, 0);
+ sampler_8x8_state->dw5.table_1x_filter_c4 =
+ intel_format_convert(coff, 1, 6, 0);
+ sampler_8x8_state->dw5.table_1x_filter_c5 = 0x00;
+ sampler_8x8_state->dw5.table_1x_filter_c6 = 0x0;
+ sampler_8x8_state->dw5.table_1x_filter_c7 = 0x0;
+ sampler_8x8_state->dw6.table_1y_filter_c0 = 0x0;
+ sampler_8x8_state->dw6.table_1y_filter_c1 = 0x0;
+ sampler_8x8_state->dw6.table_1y_filter_c2 = 0x0;
+ sampler_8x8_state->dw6.table_1y_filter_c3 =
+ intel_format_convert(1 - coff, 1, 6, 0);
+ sampler_8x8_state->dw7.table_1y_filter_c4 =
+ intel_format_convert(coff, 1, 6,0);
+ sampler_8x8_state->dw7.table_1y_filter_c5 = 0x0;
+ sampler_8x8_state->dw7.table_1y_filter_c6 = 0x0;
+ sampler_8x8_state->dw7.table_1y_filter_c7 = 0x0;
+ sampler_8x8_state++;
+ }
+
+ sampler_8x8->dw152.default_sharpness_level = 0;
+ sampler_8x8->dw153.adaptive_filter_for_all_channel = 1;
+ sampler_8x8->dw153.bypass_y_adaptive_filtering = 1;
+ sampler_8x8->dw153.bypass_x_adaptive_filtering = 1;
+
+ dri_bo_unmap(pp_context->dynamic_state.bo);
+
+
+ /* private function & data */
+ pp_context->pp_x_steps = gen7_pp_avs_x_steps;
+ pp_context->pp_y_steps = gen7_pp_avs_y_steps;
+ pp_context->private_context = &pp_context->pp_avs_context;
+ pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
+
+ pp_avs_context->dest_x = dst_rect->x;
+ pp_avs_context->dest_y = dst_rect->y;
+ pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
+ pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
+ pp_avs_context->src_w = src_rect->width;
+ pp_avs_context->src_h = src_rect->height;
+ pp_avs_context->horiz_range = (float)src_rect->width / src_width;
+
+ int dw = (pp_avs_context->src_w - 1) / 16 + 1;
+ dw = MAX(dw, dst_rect->width);
+
+ pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
+ pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */
+ pp_static_parameter->grf2.alpha = 255;
+
+ pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
+ pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
+ pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
+ (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
+ pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
+ (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
+
+ gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
+
+ dst_surface->flags = src_surface->flags;
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_pp_initialize(
+ VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ int pp_index,
+ void * filter_param
+)
+{
+ VAStatus va_status;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ dri_bo *bo;
+ int bo_size;
+ unsigned int end_offset;
+ struct pp_module *pp_module;
+ int static_param_size, inline_param_size;
+
+ dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
+ 4096);
+ assert(bo);
+ pp_context->surface_state_binding_table.bo = bo;
+
+ pp_context->idrt.num_interface_descriptors = 0;
+
+ pp_context->sampler_size = 2 * 4096;
+
+ bo_size = 4096 + pp_context->curbe_size + pp_context->sampler_size
+ + pp_context->idrt_size;
+
+ dri_bo_unreference(pp_context->dynamic_state.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "dynamic_state",
+ bo_size,
+ 4096);
+
+ assert(bo);
+ pp_context->dynamic_state.bo = bo;
+ pp_context->dynamic_state.bo_size = bo_size;
+
+ end_offset = 0;
+ pp_context->dynamic_state.end_offset = 0;
+
+ /* Constant buffer offset */
+ pp_context->curbe_offset = ALIGN(end_offset, 64);
+ end_offset = pp_context->curbe_offset + pp_context->curbe_size;
+
+ /* Interface descriptor offset */
+ pp_context->idrt_offset = ALIGN(end_offset, 64);
+ end_offset = pp_context->idrt_offset + pp_context->idrt_size;
+
+ /* Sampler state offset */
+ pp_context->sampler_offset = ALIGN(end_offset, 64);
+ end_offset = pp_context->sampler_offset + pp_context->sampler_size;
+
+ /* update the end offset of dynamic_state */
+ pp_context->dynamic_state.end_offset = ALIGN(end_offset, 64);
+
+ static_param_size = sizeof(struct gen7_pp_static_parameter);
+ inline_param_size = sizeof(struct gen7_pp_inline_parameter);
+
+ memset(pp_context->pp_static_parameter, 0, static_param_size);
+ memset(pp_context->pp_inline_parameter, 0, inline_param_size);
+
+ assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
+ pp_context->current_pp = pp_index;
+ pp_module = &pp_context->pp_modules[pp_index];
+
+ if (pp_module->initialize)
+ va_status = pp_module->initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ filter_param);
+ else
+ va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
+
+ calculate_boundary_block_mask(pp_context, dst_rect);
+
+ return va_status;
+}
+
+static void
+gen8_pp_interface_descriptor_table(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct gen8_interface_descriptor_data *desc;
+ dri_bo *bo;
+ int pp_index = pp_context->current_pp;
+ unsigned char *cc_ptr;
+
+ bo = pp_context->dynamic_state.bo;
+
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ cc_ptr = (unsigned char *)bo->virtual + pp_context->idrt_offset;
+
+ desc = (struct gen8_interface_descriptor_data *) cc_ptr +
+ pp_context->idrt.num_interface_descriptors;
+
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer =
+ pp_context->pp_modules[pp_index].kernel.kernel_offset >> 6; /* reloc */
+ desc->desc2.single_program_flow = 1;
+ desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754;
+ desc->desc3.sampler_count = 0; /* 1 - 4 samplers used */
+ desc->desc3.sampler_state_pointer = pp_context->sampler_offset >> 5;
+ desc->desc4.binding_table_entry_count = 0;
+ desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
+ desc->desc5.constant_urb_entry_read_offset = 0;
+
+ desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */
+
+ dri_bo_unmap(bo);
+ pp_context->idrt.num_interface_descriptors++;
+}
+
+
+static void
+gen8_pp_upload_constants(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ unsigned char *constant_buffer;
+ int param_size;
+
+ assert(sizeof(struct gen7_pp_static_parameter) == 192);
+
+ param_size = sizeof(struct gen7_pp_static_parameter);
+
+ dri_bo_map(pp_context->dynamic_state.bo, 1);
+ assert(pp_context->dynamic_state.bo->virtual);
+ constant_buffer = (unsigned char *) pp_context->dynamic_state.bo->virtual +
+ pp_context->curbe_offset;
+
+ memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
+ dri_bo_unmap(pp_context->dynamic_state.bo);
+ return;
+}
+
+static void
+gen8_pp_states_setup(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ gen8_pp_interface_descriptor_table(ctx, pp_context);
+ gen8_pp_upload_constants(ctx, pp_context);
+}
+
+static void
+gen6_pp_pipeline_select(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct intel_batchbuffer *batch = pp_context->batch;
+
+ BEGIN_BATCH(batch, 1);
+ OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_pp_state_base_address(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct intel_batchbuffer *batch = pp_context->batch;
+
+ BEGIN_BATCH(batch, 16);
+ OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
+ /* DW1 Generate state address */
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* DW4. Surface state address */
+ OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_BATCH(batch, 0);
+ /* DW6. Dynamic state address */
+ OUT_RELOC(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
+ 0, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0);
+
+ /* DW8. Indirect object address */
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0);
+
+ /* DW10. Instruction base address */
+ OUT_RELOC(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0);
+
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_pp_vfe_state(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct intel_batchbuffer *batch = pp_context->batch;
+
+ BEGIN_BATCH(batch, 9);
+ OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch,
+ (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
+ pp_context->vfe_gpu_state.num_urb_entries << 8);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch,
+ (pp_context->vfe_gpu_state.urb_entry_size) << 16 |
+ /* URB Entry Allocation Size, in 256 bits unit */
+ (pp_context->vfe_gpu_state.curbe_allocation_size));
+ /* CURBE Allocation Size, in 256 bits unit */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_interface_descriptor_load(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct intel_batchbuffer *batch = pp_context->batch;
+
+ BEGIN_BATCH(batch, 6);
+
+ OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
+ OUT_BATCH(batch, 0);
+
+ OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch,
+ pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data));
+ OUT_BATCH(batch, pp_context->idrt_offset);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_pp_curbe_load(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct intel_batchbuffer *batch = pp_context->batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int param_size = 64;
+
+ param_size = sizeof(struct gen7_pp_static_parameter);
+
+ BEGIN_BATCH(batch, 4);
+ OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch,
+ param_size);
+ OUT_BATCH(batch, pp_context->curbe_offset);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_pp_object_walker(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = pp_context->batch;
+ int x, x_steps, y, y_steps;
+ int param_size, command_length_in_dws, extra_cmd_in_dws;
+ dri_bo *command_buffer;
+ unsigned int *command_ptr;
+
+ param_size = sizeof(struct gen7_pp_inline_parameter);
+
+ x_steps = pp_context->pp_x_steps(pp_context->private_context);
+ y_steps = pp_context->pp_y_steps(pp_context->private_context);
+ command_length_in_dws = 6 + (param_size >> 2);
+ extra_cmd_in_dws = 2;
+ command_buffer = dri_bo_alloc(i965->intel.bufmgr,
+ "command objects buffer",
+ (command_length_in_dws + extra_cmd_in_dws) * 4 * x_steps * y_steps + 64,
+ 4096);
+
+ dri_bo_map(command_buffer, 1);
+ command_ptr = command_buffer->virtual;
+
+ for (y = 0; y < y_steps; y++) {
+ for (x = 0; x < x_steps; x++) {
+ if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
+ command_ptr += (param_size >> 2);
+
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+ }
+ }
+ }
+
+ if ((command_length_in_dws + extra_cmd_in_dws) * x_steps * y_steps % 2 == 0)
+ *command_ptr++ = 0;
+
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+ *command_ptr++ = 0;
+
+ dri_bo_unmap(command_buffer);
+
+ BEGIN_BATCH(batch, 3);
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+ OUT_RELOC(batch, command_buffer,
+ I915_GEM_DOMAIN_COMMAND, 0, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ dri_bo_unreference(command_buffer);
+
+ /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
+ * will cause control to pass back to ring buffer
+ */
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+}
+
+static void
+gen8_pp_pipeline_setup(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct intel_batchbuffer *batch = pp_context->batch;
+
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen6_pp_pipeline_select(ctx, pp_context);
+ gen8_pp_state_base_address(ctx, pp_context);
+ gen8_pp_vfe_state(ctx, pp_context);
+ gen8_pp_curbe_load(ctx, pp_context);
+ gen8_interface_descriptor_load(ctx, pp_context);
+ gen8_pp_vfe_state(ctx, pp_context);
+ gen8_pp_object_walker(ctx, pp_context);
+ intel_batchbuffer_end_atomic(batch);
+}
+
+static VAStatus
+gen8_post_processing(
+ VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ int pp_index,
+ void * filter_param
+)
+{
+ VAStatus va_status;
+
+ va_status = gen8_pp_initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ pp_index,
+ filter_param);
+
+ if (va_status == VA_STATUS_SUCCESS) {
+ gen8_pp_states_setup(ctx, pp_context);
+ gen8_pp_pipeline_setup(ctx, pp_context);
+ }
+
+ return va_status;
+}
+
+static void
+gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
+{
+ dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+ pp_context->surface_state_binding_table.bo = NULL;
+
+ dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
+ pp_context->pp_dndi_context.stmm_bo = NULL;
+
+ dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
+ pp_context->pp_dn_context.stmm_bo = NULL;
+
+ if (pp_context->instruction_state.bo) {
+ dri_bo_unreference(pp_context->instruction_state.bo);
+ pp_context->instruction_state.bo = NULL;
+ }
+
+ if (pp_context->indirect_state.bo) {
+ dri_bo_unreference(pp_context->indirect_state.bo);
+ pp_context->indirect_state.bo = NULL;
+ }
+
+ if (pp_context->dynamic_state.bo) {
+ dri_bo_unreference(pp_context->dynamic_state.bo);
+ pp_context->dynamic_state.bo = NULL;
+ }
+
+ free(pp_context->pp_static_parameter);
+ free(pp_context->pp_inline_parameter);
+ pp_context->pp_static_parameter = NULL;
+ pp_context->pp_inline_parameter = NULL;
+}
+
+#define VPP_CURBE_ALLOCATION_SIZE 32
+
+void
+gen8_post_processing_context_init(VADriverContextP ctx,
+ void *data,
+ struct intel_batchbuffer *batch)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i, kernel_size;
+ unsigned int kernel_offset, end_offset;
+ unsigned char *kernel_ptr;
+ struct pp_module *pp_module;
+ struct i965_post_processing_context *pp_context = data;
+
+ {
+ pp_context->vfe_gpu_state.max_num_threads = 60;
+ pp_context->vfe_gpu_state.num_urb_entries = 59;
+ pp_context->vfe_gpu_state.gpgpu_mode = 0;
+ pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
+ pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
+ }
+
+ pp_context->intel_post_processing = gen8_post_processing;
+ pp_context->finalize = gen8_post_processing_context_finalize;
+
+ assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8));
+
+ memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules));
+
+ kernel_size = 4096 ;
+
+ for (i = 0; i < NUM_PP_MODULES; i++) {
+ pp_module = &pp_context->pp_modules[i];
+
+ if (pp_module->kernel.bin && pp_module->kernel.size) {
+ kernel_size += pp_module->kernel.size;
+ }
+ }
+
+ pp_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "kernel shader",
+ kernel_size,
+ 0x1000);
+ if (pp_context->instruction_state.bo == NULL) {
+ WARN_ONCE("failure to allocate the buffer space for kernel shader in VPP\n");
+ return;
+ }
+
+ assert(pp_context->instruction_state.bo);
+
+
+ pp_context->instruction_state.bo_size = kernel_size;
+ pp_context->instruction_state.end_offset = 0;
+ end_offset = 0;
+
+ dri_bo_map(pp_context->instruction_state.bo, 1);
+ kernel_ptr = (unsigned char *)(pp_context->instruction_state.bo->virtual);
+
+ for (i = 0; i < NUM_PP_MODULES; i++) {
+ pp_module = &pp_context->pp_modules[i];
+
+ kernel_offset = ALIGN(end_offset, 64);
+ pp_module->kernel.kernel_offset = kernel_offset;
+
+ if (pp_module->kernel.bin && pp_module->kernel.size) {
+
+ memcpy(kernel_ptr + kernel_offset, pp_module->kernel.bin, pp_module->kernel.size);
+ end_offset = kernel_offset + pp_module->kernel.size;
+ }
+ }
+
+ pp_context->instruction_state.end_offset = ALIGN(end_offset, 64);
+
+ dri_bo_unmap(pp_context->instruction_state.bo);
+
+ /* static & inline parameters */
+ pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
+ pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
+
+ pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
+ pp_context->pp_dndi_context.current_out_obj_surface = NULL;
+ pp_context->pp_dndi_context.frame_order = -1;
+ pp_context->batch = batch;
+
+ pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data);
+ pp_context->curbe_size = 256;
+}
diff --git a/src/gen8_render.c b/src/gen8_render.c
new file mode 100644
index 0000000..9c49cbc
--- /dev/null
+++ b/src/gen8_render.c
@@ -0,0 +1,1824 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Keith Packard <keithp@keithp.com>
+ * Xiang Haihao <haihao.xiang@intel.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+/*
+ * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include <va/va_drmcommon.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_structs.h"
+
+#include "i965_render.h"
+
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 1
+
+#define PS_KERNEL_NUM_GRF 48
+#define PS_MAX_THREADS 32
+
+/* Programs for Gen8 */
+static const uint32_t sf_kernel_static_gen8[][4] ={
+
+};
+static const uint32_t ps_kernel_static_gen8[][4] = {
+#include "shaders/render/exa_wm_src_affine.g8b"
+#include "shaders/render/exa_wm_src_sample_planar.g8b"
+#include "shaders/render/exa_wm_yuv_color_balance.g8b"
+#include "shaders/render/exa_wm_yuv_rgb.g8b"
+#include "shaders/render/exa_wm_write.g8b"
+};
+
+static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
+#include "shaders/render/exa_wm_src_affine.g8b"
+#include "shaders/render/exa_wm_src_sample_argb.g8b"
+#include "shaders/render/exa_wm_write.g8b"
+};
+
+
+#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
+
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
+
+enum {
+ SF_KERNEL = 0,
+ PS_KERNEL,
+ PS_SUBPIC_KERNEL
+};
+
+static struct i965_kernel render_kernels_gen8[] = {
+ {
+ "SF",
+ SF_KERNEL,
+ sf_kernel_static_gen8,
+ sizeof(sf_kernel_static_gen8),
+ NULL
+ },
+ {
+ "PS",
+ PS_KERNEL,
+ ps_kernel_static_gen8,
+ sizeof(ps_kernel_static_gen8),
+ NULL
+ },
+
+ {
+ "PS_SUBPIC",
+ PS_SUBPIC_KERNEL,
+ ps_subpic_kernel_static_gen8,
+ sizeof(ps_subpic_kernel_static_gen8),
+ NULL
+ }
+};
+
+#define URB_VS_ENTRIES 8
+#define URB_VS_ENTRY_SIZE 1
+
+#define URB_GS_ENTRIES 0
+#define URB_GS_ENTRY_SIZE 0
+
+#define URB_CLIP_ENTRIES 0
+#define URB_CLIP_ENTRY_SIZE 0
+
+#define URB_SF_ENTRIES 1
+#define URB_SF_ENTRY_SIZE 2
+
+#define URB_CS_ENTRIES 4
+#define URB_CS_ENTRY_SIZE 4
+
+static float yuv_to_rgb_bt601[3][4] = {
+{1.164, 0, 1.596, -0.06275,},
+{1.164, -0.392, -0.813, -0.50196,},
+{1.164, 2.017, 0, -0.50196,},
+};
+
+static float yuv_to_rgb_bt709[3][4] = {
+{1.164, 0, 1.793, -0.06275,},
+{1.164, -0.213, -0.533, -0.50196,},
+{1.164, 2.112, 0, -0.50196,},
+};
+
+static float yuv_to_rgb_smpte_240[3][4] = {
+{1.164, 0, 1.794, -0.06275,},
+{1.164, -0.258, -0.5425, -0.50196,},
+{1.164, 2.078, 0, -0.50196,},
+};
+
+
+static void
+gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss0.tiled_surface = 0;
+ ss->ss0.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+/* Set "Shader Channel Select" for GEN8+ */
+void
+gen8_render_set_surface_scs(struct gen8_surface_state *ss)
+{
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+}
+
+static void
+gen8_render_set_surface_state(
+ struct gen8_surface_state *ss,
+ dri_bo *bo,
+ unsigned long offset,
+ int width,
+ int height,
+ int pitch,
+ int format,
+ unsigned int flags
+)
+{
+ unsigned int tiling;
+ unsigned int swizzle;
+
+ memset(ss, 0, sizeof(*ss));
+
+ switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
+ case I965_PP_FLAG_BOTTOM_FIELD:
+ ss->ss0.vert_line_stride_ofs = 1;
+ /* fall-through */
+ case I965_PP_FLAG_TOP_FIELD:
+ ss->ss0.vert_line_stride = 1;
+ height /= 2;
+ break;
+ }
+
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = format;
+
+ ss->ss8.base_addr = bo->offset + offset;
+
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+
+ ss->ss3.pitch = pitch - 1;
+
+ /* Always set 1(align 4 mode) per B-spec */
+ ss->ss0.vertical_alignment = 1;
+ ss->ss0.horizontal_alignment = 1;
+
+ dri_bo_get_tiling(bo, &tiling, &swizzle);
+ gen8_render_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_render_src_surface_state(
+ VADriverContextP ctx,
+ int index,
+ dri_bo *region,
+ unsigned long offset,
+ int w,
+ int h,
+ int pitch,
+ int format,
+ unsigned int flags
+)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ void *ss;
+ dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
+
+ assert(index < MAX_RENDER_SURFACES);
+
+ dri_bo_map(ss_bo, 1);
+ assert(ss_bo->virtual);
+ ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
+
+ gen8_render_set_surface_state(ss,
+ region, offset,
+ w, h,
+ pitch, format, flags);
+ gen8_render_set_surface_scs(ss);
+ dri_bo_emit_reloc(ss_bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ offset,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
+ region);
+
+ ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss_bo);
+ render_state->wm.sampler_count++;
+}
+
+static void
+gen8_render_src_surfaces_state(
+ VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ unsigned int flags
+)
+{
+ int region_pitch;
+ int rw, rh;
+ dri_bo *region;
+
+ region_pitch = obj_surface->width;
+ rw = obj_surface->orig_width;
+ rh = obj_surface->orig_height;
+ region = obj_surface->bo;
+
+ gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */
+ gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
+
+ if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
+ return;
+
+ if (obj_surface->fourcc == VA_FOURCC_NV12) {
+ gen8_render_src_surface_state(ctx, 3, region,
+ region_pitch * obj_surface->y_cb_offset,
+ obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+ I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
+ gen8_render_src_surface_state(ctx, 4, region,
+ region_pitch * obj_surface->y_cb_offset,
+ obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+ I965_SURFACEFORMAT_R8G8_UNORM, flags);
+ } else {
+ gen8_render_src_surface_state(ctx, 3, region,
+ region_pitch * obj_surface->y_cb_offset,
+ obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+ I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
+ gen8_render_src_surface_state(ctx, 4, region,
+ region_pitch * obj_surface->y_cb_offset,
+ obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+ I965_SURFACEFORMAT_R8_UNORM, flags);
+ gen8_render_src_surface_state(ctx, 5, region,
+ region_pitch * obj_surface->y_cr_offset,
+ obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+ I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
+ gen8_render_src_surface_state(ctx, 6, region,
+ region_pitch * obj_surface->y_cr_offset,
+ obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+ I965_SURFACEFORMAT_R8_UNORM, flags);
+ }
+}
+
+static void
+gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
+ struct object_surface *obj_surface)
+{
+ dri_bo *subpic_region;
+ unsigned int index = obj_surface->subpic_render_idx;
+ struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
+ struct object_image *obj_image = obj_subpic->obj_image;
+
+ assert(obj_surface);
+ assert(obj_surface->bo);
+ subpic_region = obj_image->bo;
+ /*subpicture surface*/
+ gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
+ gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
+}
+
+static void
+gen8_render_dest_surface_state(VADriverContextP ctx, int index)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct intel_region *dest_region = render_state->draw_region;
+ void *ss;
+ dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
+ int format;
+ assert(index < MAX_RENDER_SURFACES);
+
+ if (dest_region->cpp == 2) {
+ format = I965_SURFACEFORMAT_B5G6R5_UNORM;
+ } else {
+ format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
+ }
+
+ dri_bo_map(ss_bo, 1);
+ assert(ss_bo->virtual);
+ ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
+
+ gen8_render_set_surface_state(ss,
+ dest_region->bo, 0,
+ dest_region->width, dest_region->height,
+ dest_region->pitch, format, 0);
+ gen8_render_set_surface_scs(ss);
+ dri_bo_emit_reloc(ss_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
+ dest_region->bo);
+
+ ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss_bo);
+}
+
+static void
+i965_fill_vertex_buffer(
+ VADriverContextP ctx,
+ float tex_coords[4], /* [(u1,v1);(u2,v2)] */
+ float vid_coords[4] /* [(x1,y1);(x2,y2)] */
+)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ float vb[12];
+
+ enum { X1, Y1, X2, Y2 };
+
+ static const unsigned int g_rotation_indices[][6] = {
+ [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
+ [VA_ROTATION_90] = { X2, Y1, X2, Y2, X1, Y2 },
+ [VA_ROTATION_180] = { X1, Y1, X2, Y1, X2, Y2 },
+ [VA_ROTATION_270] = { X1, Y2, X1, Y1, X2, Y1 },
+ };
+
+ const unsigned int * const rotation_indices =
+ g_rotation_indices[i965->rotation_attrib->value];
+
+ vb[0] = tex_coords[rotation_indices[0]]; /* bottom-right corner */
+ vb[1] = tex_coords[rotation_indices[1]];
+ vb[2] = vid_coords[X2];
+ vb[3] = vid_coords[Y2];
+
+ vb[4] = tex_coords[rotation_indices[2]]; /* bottom-left corner */
+ vb[5] = tex_coords[rotation_indices[3]];
+ vb[6] = vid_coords[X1];
+ vb[7] = vid_coords[Y2];
+
+ vb[8] = tex_coords[rotation_indices[4]]; /* top-left corner */
+ vb[9] = tex_coords[rotation_indices[5]];
+ vb[10] = vid_coords[X1];
+ vb[11] = vid_coords[Y1];
+
+ dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
+}
+
+static void
+i965_subpic_render_upload_vertex(VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ const VARectangle *output_rect)
+{
+ unsigned int index = obj_surface->subpic_render_idx;
+ struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
+ float tex_coords[4], vid_coords[4];
+ VARectangle dst_rect;
+
+ if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
+ dst_rect = obj_subpic->dst_rect;
+ else {
+ const float sx = (float)output_rect->width / obj_surface->orig_width;
+ const float sy = (float)output_rect->height / obj_surface->orig_height;
+ dst_rect.x = output_rect->x + sx * obj_subpic->dst_rect.x;
+ dst_rect.y = output_rect->y + sy * obj_subpic->dst_rect.y;
+ dst_rect.width = sx * obj_subpic->dst_rect.width;
+ dst_rect.height = sy * obj_subpic->dst_rect.height;
+ }
+
+ tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
+ tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
+ tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
+ tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
+
+ vid_coords[0] = dst_rect.x;
+ vid_coords[1] = dst_rect.y;
+ vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
+ vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
+
+ i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
+}
+
+static void
+i965_render_upload_vertex(
+ VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ const VARectangle *src_rect,
+ const VARectangle *dst_rect
+)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct intel_region *dest_region = render_state->draw_region;
+ float tex_coords[4], vid_coords[4];
+ int width, height;
+
+ width = obj_surface->orig_width;
+ height = obj_surface->orig_height;
+
+ tex_coords[0] = (float)src_rect->x / width;
+ tex_coords[1] = (float)src_rect->y / height;
+ tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
+ tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
+
+ vid_coords[0] = dest_region->x + dst_rect->x;
+ vid_coords[1] = dest_region->y + dst_rect->y;
+ vid_coords[2] = vid_coords[0] + dst_rect->width;
+ vid_coords[3] = vid_coords[1] + dst_rect->height;
+
+ i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
+}
+
+static void
+i965_render_drawing_rectangle(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+ struct intel_region *dest_region = render_state->draw_region;
+
+ BEGIN_BATCH(batch, 4);
+ OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
+ OUT_BATCH(batch, 0x00000000);
+ OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
+ OUT_BATCH(batch, 0x00000000);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+i965_render_upload_image_palette(
+ VADriverContextP ctx,
+ struct object_image *obj_image,
+ unsigned int alpha
+)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ unsigned int i;
+
+ assert(obj_image);
+
+ if (!obj_image)
+ return;
+
+ if (obj_image->image.num_palette_entries == 0)
+ return;
+
+ BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
+ OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
+ /*fill palette*/
+ //int32_t out[16]; //0-23:color 23-31:alpha
+ for (i = 0; i < obj_image->image.num_palette_entries; i++)
+ OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_clear_dest_region(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+ struct intel_region *dest_region = render_state->draw_region;
+ unsigned int blt_cmd, br13;
+ int pitch;
+
+ blt_cmd = GEN8_XY_COLOR_BLT_CMD;
+ br13 = 0xf0 << 16;
+ pitch = dest_region->pitch;
+
+ if (dest_region->cpp == 4) {
+ br13 |= BR13_8888;
+ blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
+ } else {
+ assert(dest_region->cpp == 2);
+ br13 |= BR13_565;
+ }
+
+ if (dest_region->tiling != I915_TILING_NONE) {
+ blt_cmd |= XY_COLOR_BLT_DST_TILED;
+ pitch /= 4;
+ }
+
+ br13 |= pitch;
+
+ intel_batchbuffer_start_atomic_blt(batch, 24);
+ BEGIN_BLT_BATCH(batch, 7);
+
+ OUT_BATCH(batch, blt_cmd);
+ OUT_BATCH(batch, br13);
+ OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
+ OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
+ (dest_region->x + dest_region->width));
+ OUT_RELOC(batch, dest_region->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
+ OUT_BATCH(batch, 0x0);
+ OUT_BATCH(batch, 0x0);
+ ADVANCE_BATCH(batch);
+ intel_batchbuffer_end_atomic(batch);
+}
+
+
+/*
+ * for GEN8
+ */
+#define ALIGNMENT 64
+
+static void
+gen8_render_initialize(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ dri_bo *bo;
+ int size;
+ unsigned int end_offset;
+
+ /* VERTEX BUFFER */
+ dri_bo_unreference(render_state->vb.vertex_buffer);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vertex buffer",
+ 4096,
+ 4096);
+ assert(bo);
+ render_state->vb.vertex_buffer = bo;
+
+ /* WM */
+ dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
+ 4096);
+ assert(bo);
+ render_state->wm.surface_state_binding_table_bo = bo;
+
+ render_state->curbe_size = 256;
+
+ render_state->wm.sampler_count = 0;
+
+ render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
+
+ render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
+
+ render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
+
+ render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
+ 16 * sizeof(struct gen8_blend_state_rt);
+
+ render_state->sf_clip_size = 1024;
+
+ render_state->scissor_size = 1024;
+
+ size = ALIGN(render_state->curbe_size, ALIGNMENT) +
+ ALIGN(render_state->sampler_size, ALIGNMENT) +
+ ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
+ ALIGN(render_state->cc_state_size, ALIGNMENT) +
+ ALIGN(render_state->blend_state_size, ALIGNMENT) +
+ ALIGN(render_state->sf_clip_size, ALIGNMENT) +
+ ALIGN(render_state->scissor_size, ALIGNMENT);
+
+ dri_bo_unreference(render_state->dynamic_state.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "dynamic_state",
+ size,
+ 4096);
+
+ render_state->dynamic_state.bo = bo;
+
+ end_offset = 0;
+ render_state->dynamic_state.end_offset = 0;
+
+ /* Constant buffer offset */
+ render_state->curbe_offset = end_offset;
+ end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
+
+ /* Sampler_state */
+ render_state->sampler_offset = end_offset;
+ end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
+
+ /* CC_VIEWPORT_state */
+ render_state->cc_viewport_offset = end_offset;
+ end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
+
+ /* CC_STATE_state */
+ render_state->cc_state_offset = end_offset;
+ end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
+
+ /* Blend_state */
+ render_state->blend_state_offset = end_offset;
+ end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
+
+ /* SF_CLIP_state */
+ render_state->sf_clip_offset = end_offset;
+ end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
+
+ /* SCISSOR_state */
+ render_state->scissor_offset = end_offset;
+ end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
+
+ /* update the end offset of dynamic_state */
+ render_state->dynamic_state.end_offset = end_offset;
+
+}
+
+static void
+gen8_render_sampler(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen8_sampler_state *sampler_state;
+ int i;
+ unsigned char *cc_ptr;
+
+ assert(render_state->wm.sampler_count > 0);
+ assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
+
+ dri_bo_map(render_state->dynamic_state.bo, 1);
+ assert(render_state->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
+ render_state->sampler_offset;
+
+ sampler_state = (struct gen8_sampler_state *) cc_ptr;
+
+ for (i = 0; i < render_state->wm.sampler_count; i++) {
+ memset(sampler_state, 0, sizeof(*sampler_state));
+ sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
+ sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
+ sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
+ sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
+ sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
+ sampler_state++;
+ }
+
+ dri_bo_unmap(render_state->dynamic_state.bo);
+}
+
+static void
+gen8_render_blend_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen8_global_blend_state *global_blend_state;
+ struct gen8_blend_state_rt *blend_state;
+ unsigned char *cc_ptr;
+
+ dri_bo_map(render_state->dynamic_state.bo, 1);
+ assert(render_state->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
+ render_state->blend_state_offset;
+
+ global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
+
+ memset(global_blend_state, 0, render_state->blend_state_size);
+ /* Global blend state + blend_state for Render Target */
+ blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
+ blend_state->blend1.logic_op_enable = 1;
+ blend_state->blend1.logic_op_func = 0xc;
+ blend_state->blend1.pre_blend_clamp_enable = 1;
+
+ dri_bo_unmap(render_state->dynamic_state.bo);
+}
+
+
+static void
+gen8_render_cc_viewport(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct i965_cc_viewport *cc_viewport;
+ unsigned char *cc_ptr;
+
+ dri_bo_map(render_state->dynamic_state.bo, 1);
+ assert(render_state->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
+ render_state->cc_viewport_offset;
+
+ cc_viewport = (struct i965_cc_viewport *) cc_ptr;
+
+ memset(cc_viewport, 0, sizeof(*cc_viewport));
+
+ cc_viewport->min_depth = -1.e35;
+ cc_viewport->max_depth = 1.e35;
+
+ dri_bo_unmap(render_state->dynamic_state.bo);
+}
+
+static void
+gen8_render_color_calc_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen6_color_calc_state *color_calc_state;
+ unsigned char *cc_ptr;
+
+ dri_bo_map(render_state->dynamic_state.bo, 1);
+ assert(render_state->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
+ render_state->cc_state_offset;
+
+ color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
+
+ memset(color_calc_state, 0, sizeof(*color_calc_state));
+ color_calc_state->constant_r = 1.0;
+ color_calc_state->constant_g = 0.0;
+ color_calc_state->constant_b = 1.0;
+ color_calc_state->constant_a = 1.0;
+ dri_bo_unmap(render_state->dynamic_state.bo);
+}
+
+#define PI 3.1415926
+
+static void
+gen8_render_upload_constants(VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ unsigned int flags)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ unsigned short *constant_buffer;
+ unsigned char *cc_ptr;
+ float *color_balance_base;
+ float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
+ float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
+ float hue = (float)i965->hue_attrib->value / 180 * PI;
+ float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
+ float *yuv_to_rgb;
+ unsigned int color_flag;
+
+ dri_bo_map(render_state->dynamic_state.bo, 1);
+ assert(render_state->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
+ render_state->curbe_offset;
+
+ constant_buffer = (unsigned short *) cc_ptr;
+
+ if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
+ assert(obj_surface->fourcc == VA_FOURCC_Y800);
+
+ *constant_buffer = 2;
+ } else {
+ if (obj_surface->fourcc == VA_FOURCC_NV12)
+ *constant_buffer = 1;
+ else
+ *constant_buffer = 0;
+ }
+
+ if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
+ i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
+ i965->hue_attrib->value == DEFAULT_HUE &&
+ i965->saturation_attrib->value == DEFAULT_SATURATION)
+ constant_buffer[1] = 1; /* skip color balance transformation */
+ else
+ constant_buffer[1] = 0;
+
+ color_balance_base = (float *)constant_buffer + 4;
+ *color_balance_base++ = contrast;
+ *color_balance_base++ = brightness;
+ *color_balance_base++ = cos(hue) * contrast * saturation;
+ *color_balance_base++ = sin(hue) * contrast * saturation;
+
+ color_flag = flags & VA_SRC_COLOR_MASK;
+ yuv_to_rgb = (float *)constant_buffer + 8;
+ if (color_flag == VA_SRC_BT709)
+ memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
+ else if (color_flag == VA_SRC_SMPTE_240)
+ memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
+ else
+ memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
+
+ dri_bo_unmap(render_state->dynamic_state.bo);
+}
+
+static void
+gen8_render_setup_states(
+ VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ const VARectangle *src_rect,
+ const VARectangle *dst_rect,
+ unsigned int flags
+)
+{
+ gen8_render_dest_surface_state(ctx, 0);
+ gen8_render_src_surfaces_state(ctx, obj_surface, flags);
+ gen8_render_sampler(ctx);
+ gen8_render_cc_viewport(ctx);
+ gen8_render_color_calc_state(ctx);
+ gen8_render_blend_state(ctx);
+ gen8_render_upload_constants(ctx, obj_surface, flags);
+ i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
+}
+
+static void
+gen8_emit_state_base_address(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+
+ BEGIN_BATCH(batch, 16);
+ OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
+ OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /*DW4 */
+ OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_BATCH(batch, 0);
+
+ /*DW6*/
+ /* Dynamic state base address */
+ OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
+ 0, BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0);
+
+ /*DW8*/
+ OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
+ OUT_BATCH(batch, 0);
+
+ /*DW10 */
+ /* Instruction base address */
+ OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+ OUT_BATCH(batch, 0);
+
+ /*DW12 */
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_cc_state_pointers(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
+ OUT_BATCH(batch, (render_state->cc_state_offset + 1));
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
+ OUT_BATCH(batch, (render_state->blend_state_offset + 1));
+ ADVANCE_BATCH(batch);
+
+}
+
+static void
+gen8_emit_vertices(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+
+ BEGIN_BATCH(batch, 5);
+ OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
+ OUT_BATCH(batch,
+ (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
+ (0 << GEN8_VB0_MOCS_SHIFT) |
+ GEN7_VB0_ADDRESS_MODIFYENABLE |
+ ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+ OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 12 * 4);
+ ADVANCE_BATCH(batch);
+
+ /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
+ OUT_BATCH(batch,
+ _3DPRIM_RECTLIST);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 7);
+ OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
+ OUT_BATCH(batch,
+ GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
+ OUT_BATCH(batch, 3); /* vertex count per instance */
+ OUT_BATCH(batch, 0); /* start vertex offset */
+ OUT_BATCH(batch, 1); /* single instance */
+ OUT_BATCH(batch, 0); /* start instance location */
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_vertex_element_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ /*
+ * The VUE layout
+ * dword 0-3: pad (0, 0, 0. 0)
+ * dword 4-7: position (x, y, 1.0, 1.0),
+ * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
+ */
+
+ /* Set up our vertex elements, sourced from the single vertex buffer. */
+ OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
+
+ /* Element state 0. These are 4 dwords of 0 required for the VUE format.
+ * We don't really know or care what they do.
+ */
+
+ OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN8_VE0_VALID |
+ (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
+ (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
+ (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
+ (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
+ OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN8_VE0_VALID |
+ (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* offset 0: u,v -> {U, V, 1.0, 1.0} */
+ OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN8_VE0_VALID |
+ (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen8_emit_vs_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ /* disable VS constant buffer */
+ BEGIN_BATCH(batch, 11);
+ OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* CS Buffer 0 */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* CS Buffer 1 */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* CS Buffer 2 */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* CS Buffer 3 */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 9);
+ OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
+ OUT_BATCH(batch, 0); /* without VS kernel */
+ OUT_BATCH(batch, 0);
+ /* VS shader dispatch flag */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* DW6. VS shader GRF and URB buffer definition */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0); /* pass-through */
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+}
+
+/*
+ * URB layout on GEN8
+ * ----------------------------------------
+ * | PS Push Constants (8KB) | VS entries |
+ * ----------------------------------------
+ */
+static void
+gen8_emit_urb(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ unsigned int num_urb_entries = 64;
+
+ /* The minimum urb entries is 64 */
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* Size is 8Kbs and base address is 0Kb */
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
+ /* Size is 8Kbs and base address is 0Kb */
+ OUT_BATCH(batch,
+ (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
+ (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
+ OUT_BATCH(batch,
+ (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
+ (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
+ (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
+ OUT_BATCH(batch,
+ (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+ (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
+ OUT_BATCH(batch,
+ (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+ (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
+ OUT_BATCH(batch,
+ (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+ (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_bypass_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ /* bypass GS */
+ BEGIN_BATCH(batch, 11);
+ OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 10);
+ OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
+ /* GS shader address */
+ OUT_BATCH(batch, 0); /* without GS kernel */
+ OUT_BATCH(batch, 0);
+ /* DW3. GS shader dispatch flag */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* DW6. GS shader GRF and URB offset/length */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0); /* pass-through */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* disable HS */
+ BEGIN_BATCH(batch, 11);
+ OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 9);
+ OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
+ OUT_BATCH(batch, 0);
+ /*DW2. HS pass-through */
+ OUT_BATCH(batch, 0);
+ /*DW3. HS shader address */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /*DW5. HS shader flag. URB offset/length and so on */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* Disable TE */
+ BEGIN_BATCH(batch, 4);
+ OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* Disable DS */
+ BEGIN_BATCH(batch, 11);
+ OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 9);
+ OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
+ /* DW1. DS shader pointer */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* DW3-5. DS shader dispatch flag.*/
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* DW8. DS shader output URB */
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* Disable STREAMOUT */
+ BEGIN_BATCH(batch, 5);
+ OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_invarient_states(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ BEGIN_BATCH(batch, 1);
+ OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
+ OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+ GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+ ADVANCE_BATCH(batch);
+
+ /* Update 3D Multisample pattern */
+ BEGIN_BATCH(batch, 9);
+ OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+ OUT_BATCH(batch, 1);
+ ADVANCE_BATCH(batch);
+
+ /* Set system instruction pointer */
+ BEGIN_BATCH(batch, 3);
+ OUT_BATCH(batch, CMD_STATE_SIP | 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_clip_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0); /* pass-through */
+ OUT_BATCH(batch, 0);
+}
+
+static void
+gen8_emit_sf_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ BEGIN_BATCH(batch, 5);
+ OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
+ OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+
+ BEGIN_BATCH(batch, 4);
+ OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
+ OUT_BATCH(batch,
+ (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
+ (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
+ (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
+ (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
+ (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* SBE for backend setup */
+ BEGIN_BATCH(batch, 11);
+ OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 4);
+ OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_wm_state(VADriverContextP ctx, int kernel)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+ unsigned int num_samples = 0;
+ unsigned int max_threads;
+
+ max_threads = i965->intel.device_info->max_wm_threads - 2;
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
+ OUT_BATCH(batch,
+ (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
+ ADVANCE_BATCH(batch);
+
+ if (kernel == PS_KERNEL) {
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
+ OUT_BATCH(batch,
+ GEN8_PS_BLEND_HAS_WRITEABLE_RT);
+ ADVANCE_BATCH(batch);
+ } else if (kernel == PS_SUBPIC_KERNEL) {
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
+ OUT_BATCH(batch,
+ (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
+ GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
+ (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
+ (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
+ (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
+ (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
+ ADVANCE_BATCH(batch);
+ }
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
+ OUT_BATCH(batch,
+ GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 11);
+ OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
+ OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
+ OUT_BATCH(batch, 0);
+ /*DW3-4. Constant buffer 0 */
+ OUT_BATCH(batch, render_state->curbe_offset);
+ OUT_BATCH(batch, 0);
+
+ /*DW5-10. Constant buffer 1-3 */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 12);
+ OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
+ /* PS shader address */
+ OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
+
+ OUT_BATCH(batch, 0);
+ /* DW3. PS shader flag .Binding table cnt/sample cnt */
+ OUT_BATCH(batch,
+ (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
+ (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ /* DW4-5. Scatch space */
+ OUT_BATCH(batch, 0); /* scratch space base offset */
+ OUT_BATCH(batch, 0);
+ /* DW6. PS shader threads. */
+ OUT_BATCH(batch,
+ ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
+ GEN7_PS_PUSH_CONSTANT_ENABLE |
+ GEN7_PS_16_DISPATCH_ENABLE);
+ /* DW7. PS shader GRF */
+ OUT_BATCH(batch,
+ (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
+ OUT_BATCH(batch, 0); /* kernel 1 pointer */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0); /* kernel 2 pointer */
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
+ OUT_BATCH(batch, BINDING_TABLE_OFFSET);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_depth_buffer_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ BEGIN_BATCH(batch, 8);
+ OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
+ OUT_BATCH(batch,
+ (I965_DEPTHFORMAT_D32_FLOAT << 18) |
+ (I965_SURFACE_NULL << 29));
+ /* DW2-3. Depth Buffer Address */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ /* DW4-7. Surface structure */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* Update the Hier Depth buffer */
+ BEGIN_BATCH(batch, 5);
+ OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ /* Update the stencil buffer */
+ BEGIN_BATCH(batch, 5);
+ OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 3);
+ OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_depth_stencil_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ BEGIN_BATCH(batch, 3);
+ OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_wm_hz_op(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ BEGIN_BATCH(batch, 5);
+ OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_viewport_state_pointers(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
+ OUT_BATCH(batch, render_state->cc_viewport_offset);
+ ADVANCE_BATCH(batch);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_emit_sampler_state_pointers(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ struct i965_render_state *render_state = &i965->render_state;
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
+ OUT_BATCH(batch, render_state->sampler_offset);
+ ADVANCE_BATCH(batch);
+}
+
+
+static void
+gen7_emit_drawing_rectangle(VADriverContextP ctx)
+{
+ i965_render_drawing_rectangle(ctx);
+}
+
+static void
+gen8_render_emit_states(VADriverContextP ctx, int kernel)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen8_emit_invarient_states(ctx);
+ gen8_emit_state_base_address(ctx);
+ gen8_emit_viewport_state_pointers(ctx);
+ gen8_emit_urb(ctx);
+ gen8_emit_cc_state_pointers(ctx);
+ gen8_emit_sampler_state_pointers(ctx);
+ gen8_emit_wm_hz_op(ctx);
+ gen8_emit_bypass_state(ctx);
+ gen8_emit_vs_state(ctx);
+ gen8_emit_clip_state(ctx);
+ gen8_emit_sf_state(ctx);
+ gen8_emit_depth_stencil_state(ctx);
+ gen8_emit_wm_state(ctx, kernel);
+ gen8_emit_depth_buffer_state(ctx);
+ gen7_emit_drawing_rectangle(ctx);
+ gen8_emit_vertex_element_state(ctx);
+ gen8_emit_vertices(ctx);
+ intel_batchbuffer_end_atomic(batch);
+}
+
+static void
+gen8_render_put_surface(
+ VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ const VARectangle *src_rect,
+ const VARectangle *dst_rect,
+ unsigned int flags
+)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+
+ gen8_render_initialize(ctx);
+ gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
+ gen8_clear_dest_region(ctx);
+ gen8_render_emit_states(ctx, PS_KERNEL);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen8_subpicture_render_blend_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen8_global_blend_state *global_blend_state;
+ struct gen8_blend_state_rt *blend_state;
+ unsigned char *cc_ptr;
+
+ dri_bo_map(render_state->dynamic_state.bo, 1);
+ assert(render_state->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
+ render_state->blend_state_offset;
+
+ global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
+
+ memset(global_blend_state, 0, render_state->blend_state_size);
+ /* Global blend state + blend_state for Render Target */
+ blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
+ blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
+ blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
+ blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
+ blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
+ blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
+ blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
+ blend_state->blend0.colorbuf_blend = 1;
+ blend_state->blend1.post_blend_clamp_enable = 1;
+ blend_state->blend1.pre_blend_clamp_enable = 1;
+ blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
+
+ dri_bo_unmap(render_state->dynamic_state.bo);
+}
+
+static void
+gen8_subpic_render_upload_constants(VADriverContextP ctx,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ float *constant_buffer;
+ float global_alpha = 1.0;
+ unsigned int index = obj_surface->subpic_render_idx;
+ struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
+ unsigned char *cc_ptr;
+
+ if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
+ global_alpha = obj_subpic->global_alpha;
+ }
+
+
+ dri_bo_map(render_state->dynamic_state.bo, 1);
+ assert(render_state->dynamic_state.bo->virtual);
+
+ cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
+ render_state->curbe_offset;
+
+ constant_buffer = (float *) cc_ptr;
+ *constant_buffer = global_alpha;
+
+ dri_bo_unmap(render_state->dynamic_state.bo);
+}
+
+static void
+gen8_subpicture_render_setup_states(
+ VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ const VARectangle *src_rect,
+ const VARectangle *dst_rect
+)
+{
+ gen8_render_dest_surface_state(ctx, 0);
+ gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
+ gen8_render_sampler(ctx);
+ gen8_render_cc_viewport(ctx);
+ gen8_render_color_calc_state(ctx);
+ gen8_subpicture_render_blend_state(ctx);
+ gen8_subpic_render_upload_constants(ctx, obj_surface);
+ i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
+}
+
+static void
+gen8_render_put_subpicture(
+ VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ const VARectangle *src_rect,
+ const VARectangle *dst_rect
+)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = i965->batch;
+ unsigned int index = obj_surface->subpic_render_idx;
+ struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
+
+ assert(obj_subpic);
+ gen8_render_initialize(ctx);
+ gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
+ gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
+ i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen8_render_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+
+ dri_bo_unreference(render_state->vb.vertex_buffer);
+ render_state->vb.vertex_buffer = NULL;
+
+ dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
+ render_state->wm.surface_state_binding_table_bo = NULL;
+
+ if (render_state->instruction_state.bo) {
+ dri_bo_unreference(render_state->instruction_state.bo);
+ render_state->instruction_state.bo = NULL;
+ }
+
+ if (render_state->dynamic_state.bo) {
+ dri_bo_unreference(render_state->dynamic_state.bo);
+ render_state->dynamic_state.bo = NULL;
+ }
+
+ if (render_state->indirect_state.bo) {
+ dri_bo_unreference(render_state->indirect_state.bo);
+ render_state->indirect_state.bo = NULL;
+ }
+
+ if (render_state->draw_region) {
+ dri_bo_unreference(render_state->draw_region->bo);
+ free(render_state->draw_region);
+ render_state->draw_region = NULL;
+ }
+}
+
+bool
+gen8_render_init(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ int i, kernel_size;
+ unsigned int kernel_offset, end_offset;
+ unsigned char *kernel_ptr;
+ struct i965_kernel *kernel;
+
+ render_state->render_put_surface = gen8_render_put_surface;
+ render_state->render_put_subpicture = gen8_render_put_subpicture;
+ render_state->render_terminate = gen8_render_terminate;
+
+ memcpy(render_state->render_kernels, render_kernels_gen8,
+ sizeof(render_state->render_kernels));
+
+ kernel_size = 4096;
+
+ for (i = 0; i < NUM_RENDER_KERNEL; i++) {
+ kernel = &render_state->render_kernels[i];
+
+ if (!kernel->size)
+ continue;
+
+ kernel_size += kernel->size;
+ }
+
+ render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "kernel shader",
+ kernel_size,
+ 0x1000);
+ if (render_state->instruction_state.bo == NULL) {
+ WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
+ return false;
+ }
+
+ assert(render_state->instruction_state.bo);
+
+ render_state->instruction_state.bo_size = kernel_size;
+ render_state->instruction_state.end_offset = 0;
+ end_offset = 0;
+
+ dri_bo_map(render_state->instruction_state.bo, 1);
+ kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
+ for (i = 0; i < NUM_RENDER_KERNEL; i++) {
+ kernel = &render_state->render_kernels[i];
+ kernel_offset = end_offset;
+ kernel->kernel_offset = kernel_offset;
+
+ if (!kernel->size)
+ continue;
+
+ memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
+
+ end_offset += ALIGN(kernel->size, ALIGNMENT);
+ }
+
+ render_state->instruction_state.end_offset = end_offset;
+
+ dri_bo_unmap(render_state->instruction_state.bo);
+
+ return true;
+}
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
new file mode 100644
index 0000000..8cae2a0
--- /dev/null
+++ b/src/gen8_vme.c
@@ -0,0 +1,1214 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ * Xiang Haihao <haihao.xiang@intel.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_encoder.h"
+#include "gen6_vme.h"
+#include "gen6_mfc.h"
+
+#ifdef SURFACE_STATE_PADDED_SIZE
+#undef SURFACE_STATE_PADDED_SIZE
+#endif
+
+#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
+
+#define VME_INTRA_SHADER 0
+#define VME_INTER_SHADER 1
+#define VME_BINTER_SHADER 2
+
+#define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */
+#define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
+#define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
+
+#define VME_MSG_LENGTH 32
+
+static const uint32_t gen8_vme_intra_frame[][4] = {
+#include "shaders/vme/intra_frame_gen8.g8b"
+};
+
+static const uint32_t gen8_vme_inter_frame[][4] = {
+#include "shaders/vme/inter_frame_gen8.g8b"
+};
+
+static const uint32_t gen8_vme_inter_bframe[][4] = {
+#include "shaders/vme/inter_bframe_gen8.g8b"
+};
+
+static struct i965_kernel gen8_vme_kernels[] = {
+ {
+ "VME Intra Frame",
+ VME_INTRA_SHADER, /*index*/
+ gen8_vme_intra_frame,
+ sizeof(gen8_vme_intra_frame),
+ NULL
+ },
+ {
+ "VME inter Frame",
+ VME_INTER_SHADER,
+ gen8_vme_inter_frame,
+ sizeof(gen8_vme_inter_frame),
+ NULL
+ },
+ {
+ "VME inter BFrame",
+ VME_BINTER_SHADER,
+ gen8_vme_inter_bframe,
+ sizeof(gen8_vme_inter_bframe),
+ NULL
+ }
+};
+
+static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = {
+#include "shaders/vme/intra_frame_gen8.g8b"
+};
+
+static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = {
+#include "shaders/vme/mpeg2_inter_gen8.g8b"
+};
+
+static struct i965_kernel gen8_vme_mpeg2_kernels[] = {
+ {
+ "VME Intra Frame",
+ VME_INTRA_SHADER, /*index*/
+ gen8_vme_mpeg2_intra_frame,
+ sizeof(gen8_vme_mpeg2_intra_frame),
+ NULL
+ },
+ {
+ "VME inter Frame",
+ VME_INTER_SHADER,
+ gen8_vme_mpeg2_inter_frame,
+ sizeof(gen8_vme_mpeg2_inter_frame),
+ NULL
+ },
+};
+
+/* only used for VME source surface state */
+static void
+gen8_vme_source_surface_state(VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ vme_context->vme_surface2_setup(ctx,
+ &vme_context->gpe_context,
+ obj_surface,
+ BINDING_TABLE_OFFSET(index),
+ SURFACE_STATE_OFFSET(index));
+}
+
+static void
+gen8_vme_media_source_surface_state(VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ vme_context->vme_media_rw_surface_setup(ctx,
+ &vme_context->gpe_context,
+ obj_surface,
+ BINDING_TABLE_OFFSET(index),
+ SURFACE_STATE_OFFSET(index));
+}
+
+static void
+gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ vme_context->vme_media_chroma_surface_setup(ctx,
+ &vme_context->gpe_context,
+ obj_surface,
+ BINDING_TABLE_OFFSET(index),
+ SURFACE_STATE_OFFSET(index));
+}
+
+static void
+gen8_vme_output_buffer_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int index,
+ struct intel_encoder_context *encoder_context)
+
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+
+ vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+ vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
+
+ if (is_intra)
+ vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
+ else
+ vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
+ /*
+ * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
+ * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
+ * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
+ */
+
+ vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME output buffer",
+ vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
+ 0x1000);
+ assert(vme_context->vme_output.bo);
+ vme_context->vme_buffer_suface_setup(ctx,
+ &vme_context->gpe_context,
+ &vme_context->vme_output,
+ BINDING_TABLE_OFFSET(index),
+ SURFACE_STATE_OFFSET(index));
+}
+
+static void
+gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int index,
+ struct intel_encoder_context *encoder_context)
+
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+
+ vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
+ vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
+ vme_context->vme_batchbuffer.pitch = 16;
+ vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME batchbuffer",
+ vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
+ 0x1000);
+ /*
+ vme_context->vme_buffer_suface_setup(ctx,
+ &vme_context->gpe_context,
+ &vme_context->vme_batchbuffer,
+ BINDING_TABLE_OFFSET(index),
+ SURFACE_STATE_OFFSET(index));
+ */
+}
+
+static VAStatus
+gen8_vme_surface_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
+{
+ struct object_surface *obj_surface;
+
+ /*Setup surfaces state*/
+ /* current picture for encoding */
+ obj_surface = encode_state->input_yuv_object;
+ gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
+ gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
+ gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
+
+ if (!is_intra) {
+ VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int slice_type;
+
+ slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
+
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state);
+
+ if (slice_type == SLICE_TYPE_B)
+ intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state);
+ }
+
+ /* VME output */
+ gen8_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
+ gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct gen8_interface_descriptor_data *desc;
+ int i;
+ dri_bo *bo;
+ unsigned char *desc_ptr;
+
+ bo = vme_context->gpe_context.dynamic_state.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+
+ desc = (struct gen8_interface_descriptor_data *)desc_ptr;
+
+ for (i = 0; i < vme_context->vme_kernel_sum; i++) {
+ struct i965_kernel *kernel;
+ kernel = &vme_context->gpe_context.kernels[i];
+ assert(sizeof(*desc) == 32);
+ /*Setup the descritor table*/
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
+ desc->desc3.sampler_count = 0; /* FIXME: */
+ desc->desc3.sampler_state_pointer = 0;
+ desc->desc4.binding_table_entry_count = 1; /* FIXME: */
+ desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
+ desc->desc5.constant_urb_entry_read_offset = 0;
+ desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
+
+
+ desc++;
+ }
+
+ dri_bo_unmap(bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ unsigned char *constant_buffer;
+ unsigned int *vme_state_message;
+ int mv_num = 32;
+
+ vme_state_message = (unsigned int *)vme_context->vme_state_message;
+
+ if (encoder_context->codec == CODEC_H264 ||
+ encoder_context->codec == CODEC_H264_MVC) {
+ if (vme_context->h264_level >= 30) {
+ mv_num = 16;
+
+ if (vme_context->h264_level >= 31)
+ mv_num = 8;
+ }
+ } else if (encoder_context->codec == CODEC_MPEG2) {
+ mv_num = 2;
+ }
+
+ vme_state_message[31] = mv_num;
+
+ dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
+ assert(vme_context->gpe_context.dynamic_state.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
+ vme_context->gpe_context.curbe_offset;
+
+ /* VME MV/Mb cost table is passed by using const buffer */
+ /* Now it uses the fixed search path. So it is constructed directly
+ * in the GPU shader.
+ */
+ memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
+
+ dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+#define MB_SCOREBOARD_A (1 << 0)
+#define MB_SCOREBOARD_B (1 << 1)
+#define MB_SCOREBOARD_C (1 << 2)
+
+/* check whether the mb of (x_index, y_index) is out of bound */
+static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
+{
+ int mb_index;
+ if (x_index < 0 || x_index >= mb_width)
+ return -1;
+ if (y_index < 0 || y_index >= mb_height)
+ return -1;
+
+ mb_index = y_index * mb_width + x_index;
+ if (mb_index < first_mb || mb_index > (first_mb + num_mb))
+ return -1;
+ return 0;
+}
+
+static void
+gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int mb_row;
+ int s;
+ unsigned int *command_ptr;
+
+#define USE_SCOREBOARD (1 << 21)
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ int first_mb = pSliceParameter->macroblock_address;
+ int num_mb = pSliceParameter->num_macroblocks;
+ unsigned int mb_intra_ub, score_dep;
+ int x_outer, y_outer, x_inner, y_inner;
+ int xtemp_outer = 0;
+
+ x_outer = first_mb % mb_width;
+ y_outer = first_mb / mb_width;
+ mb_row = y_outer;
+
+ for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ x_inner = x_outer;
+ y_inner = y_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != mb_row) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = USE_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer += 1;
+ }
+
+ xtemp_outer = mb_width - 2;
+ if (xtemp_outer < 0)
+ xtemp_outer = 0;
+ x_outer = xtemp_outer;
+ y_outer = first_mb / mb_width;
+ for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ y_inner = y_outer;
+ x_inner = x_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != mb_row) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = USE_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer++;
+ if (x_outer >= mb_width) {
+ y_outer += 1;
+ x_outer = xtemp_outer;
+ }
+ }
+ }
+
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+ *command_ptr++ = 0;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
+
+static void
+gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int mb_x = 0, mb_y = 0;
+ int i, s;
+ unsigned int *command_ptr;
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ int slice_mb_begin = pSliceParameter->macroblock_address;
+ int slice_mb_number = pSliceParameter->num_macroblocks;
+ unsigned int mb_intra_ub;
+ int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
+ for (i = 0; i < slice_mb_number; ) {
+ int mb_count = i + slice_mb_begin;
+ mb_x = mb_count % mb_width;
+ mb_y = mb_count / mb_width;
+ mb_intra_ub = 0;
+ if (mb_x != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ }
+ if (mb_y != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ if (mb_x != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+ if (mb_x != (mb_width -1))
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ }
+ if (i < mb_width) {
+ if (i == 0)
+ mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
+ mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
+ if ((i == (mb_width - 1)) && slice_mb_x) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ }
+ }
+
+ if ((i == mb_width) && slice_mb_x) {
+ mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
+ }
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
+ *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+ i += 1;
+ }
+ }
+
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+ *command_ptr++ = 0;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
+
+static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ gen8_gpe_context_init(ctx, &vme_context->gpe_context);
+
+ /* VME output buffer */
+ dri_bo_unreference(vme_context->vme_output.bo);
+ vme_context->vme_output.bo = NULL;
+
+ dri_bo_unreference(vme_context->vme_batchbuffer.bo);
+ vme_context->vme_batchbuffer.bo = NULL;
+
+ /* VME state */
+ dri_bo_unreference(vme_context->vme_state.bo);
+ vme_context->vme_state.bo = NULL;
+}
+
+static void gen8_vme_pipeline_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ int kernel_shader;
+ bool allow_hwscore = true;
+ int s;
+ unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
+
+ if (is_low_quality)
+ allow_hwscore = false;
+ else {
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ if ((pSliceParameter->macroblock_address % width_in_mbs)) {
+ allow_hwscore = false;
+ break;
+ }
+ }
+ }
+
+ if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
+ (pSliceParameter->slice_type == SLICE_TYPE_I)) {
+ kernel_shader = VME_INTRA_SHADER;
+ } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
+ (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
+ kernel_shader = VME_INTER_SHADER;
+ } else {
+ kernel_shader = VME_BINTER_SHADER;
+ if (!allow_hwscore)
+ kernel_shader = VME_INTER_SHADER;
+ }
+ if (allow_hwscore)
+ gen8wa_vme_walker_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
+ encoder_context);
+ else
+ gen8_vme_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
+ encoder_context);
+
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+ gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
+ BEGIN_BATCH(batch, 3);
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+ OUT_RELOC(batch,
+ vme_context->vme_batchbuffer.bo,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ intel_batchbuffer_end_atomic(batch);
+}
+
+static VAStatus gen8_vme_prepare(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ if (!vme_context->h264_level ||
+ (vme_context->h264_level != pSequenceParameter->level_idc)) {
+ vme_context->h264_level = pSequenceParameter->level_idc;
+ }
+
+ intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
+
+ /*Setup all the memory object*/
+ gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
+ gen8_vme_interface_setup(ctx, encode_state, encoder_context);
+ //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
+ gen8_vme_constant_setup(ctx, encode_state, encoder_context);
+
+ /*Programing media pipeline*/
+ gen8_vme_pipeline_programing(ctx, encode_state, encoder_context);
+
+ return vaStatus;
+}
+
+static VAStatus gen8_vme_run(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+
+ intel_batchbuffer_flush(batch);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen8_vme_stop(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen8_vme_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ gen8_vme_media_init(ctx, encoder_context);
+ gen8_vme_prepare(ctx, encode_state, encoder_context);
+ gen8_vme_run(ctx, encode_state, encoder_context);
+ gen8_vme_stop(ctx, encode_state, encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static void
+gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int index,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
+
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
+ int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+
+ vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+ vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
+
+ if (is_intra)
+ vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
+ else
+ vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
+ /*
+ * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
+ * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
+ * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
+ */
+
+ vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME output buffer",
+ vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
+ 0x1000);
+ assert(vme_context->vme_output.bo);
+ vme_context->vme_buffer_suface_setup(ctx,
+ &vme_context->gpe_context,
+ &vme_context->vme_output,
+ BINDING_TABLE_OFFSET(index),
+ SURFACE_STATE_OFFSET(index));
+}
+
+static void
+gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int index,
+ struct intel_encoder_context *encoder_context)
+
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
+ int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+
+ vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
+ vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
+ vme_context->vme_batchbuffer.pitch = 16;
+ vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME batchbuffer",
+ vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
+ 0x1000);
+ vme_context->vme_buffer_suface_setup(ctx,
+ &vme_context->gpe_context,
+ &vme_context->vme_batchbuffer,
+ BINDING_TABLE_OFFSET(index),
+ SURFACE_STATE_OFFSET(index));
+}
+
+static VAStatus
+gen8_vme_mpeg2_surface_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
+{
+ struct object_surface *obj_surface;
+
+ /*Setup surfaces state*/
+ /* current picture for encoding */
+ obj_surface = encode_state->input_yuv_object;
+ gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
+ gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
+ gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
+
+ if (!is_intra) {
+ /* reference 0 */
+ obj_surface = encode_state->reference_objects[0];
+
+ if (obj_surface->bo != NULL)
+ gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
+
+ /* reference 1 */
+ obj_surface = encode_state->reference_objects[1];
+
+ if (obj_surface && obj_surface->bo != NULL)
+ gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
+ }
+
+ /* VME output */
+ gen8_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
+ gen8_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static void
+gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ unsigned int *command_ptr;
+
+#define MPEG2_SCOREBOARD (1 << 21)
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ {
+ unsigned int mb_intra_ub, score_dep;
+ int x_outer, y_outer, x_inner, y_inner;
+ int xtemp_outer = 0;
+ int first_mb = 0;
+ int num_mb = mb_width * mb_height;
+
+ x_outer = 0;
+ y_outer = 0;
+
+
+ for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ x_inner = x_outer;
+ y_inner = y_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = MPEG2_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer += 1;
+ }
+
+ xtemp_outer = mb_width - 2;
+ if (xtemp_outer < 0)
+ xtemp_outer = 0;
+ x_outer = xtemp_outer;
+ y_outer = 0;
+ for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ y_inner = y_outer;
+ x_inner = x_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = MPEG2_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer++;
+ if (x_outer >= mb_width) {
+ y_outer += 1;
+ x_outer = xtemp_outer;
+ }
+ }
+ }
+
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+ *command_ptr++ = 0;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+ return;
+}
+
+static void
+gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int mb_x = 0, mb_y = 0;
+ int i, s, j;
+ unsigned int *command_ptr;
+
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
+
+ for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
+ int slice_mb_begin = slice_param->macroblock_address;
+ int slice_mb_number = slice_param->num_macroblocks;
+ unsigned int mb_intra_ub;
+
+ for (i = 0; i < slice_mb_number;) {
+ int mb_count = i + slice_mb_begin;
+
+ mb_x = mb_count % mb_width;
+ mb_y = mb_count / mb_width;
+ mb_intra_ub = 0;
+
+ if (mb_x != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ }
+
+ if (mb_y != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+
+ if (mb_x != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (mb_x != (mb_width -1))
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
+ *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
+ i += 1;
+ }
+
+ slice_param++;
+ }
+ }
+
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+ *command_ptr++ = 0;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
+
+static void
+gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
+ int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+ bool allow_hwscore = true;
+ int s;
+ int kernel_shader;
+ VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ int j;
+ VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
+
+ for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
+ if (slice_param->macroblock_address % width_in_mbs) {
+ allow_hwscore = false;
+ break;
+ }
+ }
+ }
+
+ pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ if (pic_param->picture_type == VAEncPictureTypeIntra) {
+ allow_hwscore = false;
+ kernel_shader = VME_INTRA_SHADER;
+ } else {
+ kernel_shader = VME_INTER_SHADER;
+ }
+
+ if (allow_hwscore)
+ gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ kernel_shader,
+ encoder_context);
+ else
+ gen8_vme_mpeg2_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+ 0,
+ encoder_context);
+
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+ gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
+ BEGIN_BATCH(batch, 4);
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+ OUT_RELOC(batch,
+ vme_context->vme_batchbuffer.bo,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+
+ intel_batchbuffer_end_atomic(batch);
+}
+
+static VAStatus
+gen8_vme_mpeg2_prepare(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+ if ((!vme_context->mpeg2_level) ||
+ (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
+ vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
+ }
+
+
+ /*Setup all the memory object*/
+ gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
+ gen8_vme_interface_setup(ctx, encode_state, encoder_context);
+ //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
+ intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
+ gen8_vme_constant_setup(ctx, encode_state, encoder_context);
+
+ /*Programing media pipeline*/
+ gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
+
+ return vaStatus;
+}
+
+static VAStatus
+gen8_vme_mpeg2_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ gen8_vme_media_init(ctx, encoder_context);
+ gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
+ gen8_vme_run(ctx, encode_state, encoder_context);
+ gen8_vme_stop(ctx, encode_state, encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static void
+gen8_vme_context_destroy(void *context)
+{
+ struct gen6_vme_context *vme_context = context;
+
+ gen8_gpe_context_destroy(&vme_context->gpe_context);
+
+ dri_bo_unreference(vme_context->vme_output.bo);
+ vme_context->vme_output.bo = NULL;
+
+ dri_bo_unreference(vme_context->vme_state.bo);
+ vme_context->vme_state.bo = NULL;
+
+ dri_bo_unreference(vme_context->vme_batchbuffer.bo);
+ vme_context->vme_batchbuffer.bo = NULL;
+
+ if (vme_context->vme_state_message) {
+ free(vme_context->vme_state_message);
+ vme_context->vme_state_message = NULL;
+ }
+
+ free(vme_context);
+}
+
+Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
+ struct i965_kernel *vme_kernel_list = NULL;
+ int i965_kernel_num;
+
+ switch (encoder_context->codec) {
+ case CODEC_H264:
+ case CODEC_H264_MVC:
+ vme_kernel_list = gen8_vme_kernels;
+ encoder_context->vme_pipeline = gen8_vme_pipeline;
+ i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel);
+ break;
+
+ case CODEC_MPEG2:
+ vme_kernel_list = gen8_vme_mpeg2_kernels;
+ encoder_context->vme_pipeline = gen8_vme_mpeg2_pipeline;
+ i965_kernel_num = sizeof(gen8_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
+
+ break;
+
+ default:
+ /* never get here */
+ assert(0);
+
+ break;
+ }
+ vme_context->vme_kernel_sum = i965_kernel_num;
+ vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
+
+ vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.sampler_size = 0;
+
+
+ vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+ vme_context->gpe_context.vfe_state.num_urb_entries = 64;
+ vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
+ vme_context->gpe_context.vfe_state.urb_entry_size = 16;
+ vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
+
+ gen7_vme_scoreboard_init(ctx, vme_context);
+
+ gen8_gpe_load_kernels(ctx,
+ &vme_context->gpe_context,
+ vme_kernel_list,
+ i965_kernel_num);
+ vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
+ vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
+ vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
+ vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
+
+ encoder_context->vme_context = vme_context;
+ encoder_context->vme_context_destroy = gen8_vme_context_destroy;
+
+ vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
+
+ return True;
+}
diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c
index 0a5b89b..944a608 100644
--- a/src/i965_avc_bsd.c
+++ b/src/i965_avc_bsd.c
@@ -51,6 +51,7 @@ i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx,
if (!avc_bsd_surface) {
avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1);
+ avc_bsd_surface->frame_store_id = -1;
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = avc_bsd_surface;
}
@@ -388,7 +389,7 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
struct i965_avc_bsd_context *i965_avc_bsd_context;
- int i, j;
+ int i;
VAPictureH264 *va_pic;
struct object_surface *obj_surface;
GenAvcSurface *avc_bsd_surface;
@@ -418,24 +419,8 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
OUT_BCS_BATCH(batch, 0);
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
- if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID &&
- i965_h264_context->fsid_list[i].obj_surface &&
- i965_h264_context->fsid_list[i].obj_surface->private_data) {
- int found = 0;
- for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
- va_pic = &pic_param->ReferenceFrames[j];
-
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
-
- if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
- found = 1;
- break;
- }
- }
-
- assert(found == 1);
- obj_surface = i965_h264_context->fsid_list[i].obj_surface;
+ obj_surface = i965_h264_context->fsid_list[i].obj_surface;
+ if (obj_surface && obj_surface->private_data) {
avc_bsd_surface = obj_surface->private_data;
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
@@ -458,9 +443,11 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
va_pic = &pic_param->CurrPic;
obj_surface = decode_state->render_object;
- obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
- obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
- i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ if (pic_param->pic_fields.bits.reference_pic_flag)
+ obj_surface->flags |= SURFACE_REFERENCED;
+ else
+ obj_surface->flags &= ~SURFACE_REFERENCED;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
/* initial uv component for YUV400 case */
if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
@@ -490,26 +477,16 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
/* POC List */
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
- if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
- int found = 0;
- for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
- va_pic = &pic_param->ReferenceFrames[j];
-
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
-
- if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
- found = 1;
- break;
- }
- }
+ obj_surface = i965_h264_context->fsid_list[i].obj_surface;
- assert(found == 1);
+ if (obj_surface) {
+ const VAPictureH264 * const va_pic = avc_find_picture(
+ obj_surface->base.id, pic_param->ReferenceFrames,
+ ARRAY_ELEMS(pic_param->ReferenceFrames));
- if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
- OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
- OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
- }
+ assert(va_pic != NULL);
+ OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
} else {
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
@@ -803,7 +780,7 @@ i965_avc_bsd_object(VADriverContextP ctx,
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- if (IS_IRONLAKE(i965->intel.device_id))
+ if (IS_IRONLAKE(i965->intel.device_info))
ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
else
g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
@@ -941,8 +918,8 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
-
- intel_update_avc_frame_store_index(ctx, decode_state, pic_param, i965_h264_context->fsid_list);
+ intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
+ i965_h264_context->fsid_list, &i965_h264_context->fs_ctx);
i965_weight128_workaround(ctx,decode_state, h264_context);
i965_h264_context->enable_avc_ildb = 0;
diff --git a/src/i965_avc_hw_scoreboard.c b/src/i965_avc_hw_scoreboard.c
index b17ea83..f866599 100644
--- a/src/i965_avc_hw_scoreboard.c
+++ b/src/i965_avc_hw_scoreboard.c
@@ -217,7 +217,7 @@ i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_h264_context
unsigned int vfe_fence, cs_fence;
vfe_fence = avc_hw_scoreboard_context->urb.cs_start;
- cs_fence = URB_SIZE((&i965->intel));
+ cs_fence = i965->intel.device_info->urb_size;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
@@ -429,7 +429,7 @@ i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx, void *h264_context)
avc_hw_scoreboard_context->urb.cs_start = avc_hw_scoreboard_context->urb.vfe_start +
avc_hw_scoreboard_context->urb.num_vfe_entries * avc_hw_scoreboard_context->urb.size_vfe_entry;
assert(avc_hw_scoreboard_context->urb.cs_start +
- avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+ avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
}
}
diff --git a/src/i965_avc_ildb.c b/src/i965_avc_ildb.c
index 62d599e..10292e0 100644
--- a/src/i965_avc_ildb.c
+++ b/src/i965_avc_ildb.c
@@ -342,7 +342,7 @@ i965_avc_ildb_upload_constants(VADriverContextP ctx,
assert(avc_ildb_context->curbe.bo->virtual);
root_input = avc_ildb_context->curbe.bo->virtual;
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
root_input->max_concurrent_threads = 76; /* 72 - 2 + 8 - 2 */
} else {
root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 - 2 */
@@ -410,7 +410,7 @@ i965_avc_ildb_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h2
unsigned int vfe_fence, cs_fence;
vfe_fence = avc_ildb_context->urb.cs_start;
- cs_fence = URB_SIZE((&i965->intel));
+ cs_fence = i965->intel.device_info->urb_size;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
@@ -427,7 +427,7 @@ i965_avc_ildb_state_base_address(VADriverContextP ctx, struct i965_h264_context
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = i965_h264_context->batch;
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
@@ -596,8 +596,8 @@ i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context)
avc_ildb_context->urb.vfe_start = 0;
avc_ildb_context->urb.cs_start = avc_ildb_context->urb.vfe_start +
avc_ildb_context->urb.num_vfe_entries * avc_ildb_context->urb.size_vfe_entry;
- // assert(avc_ildb_context->urb.cs_start +
- // avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+ // assert(avc_ildb_context->urb.cs_start +
+ // avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) {
dri_bo_unreference(avc_ildb_context->surface[i].s_bo);
@@ -614,7 +614,7 @@ i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context)
/* kernel offset */
assert(NUM_AVC_ILDB_INTERFACES == ARRAY_ELEMS(avc_ildb_kernel_offset_gen5));
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen5;
} else {
avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen4;
diff --git a/src/i965_decoder.h b/src/i965_decoder.h
index 4f7d2cc..14d4d0c 100644
--- a/src/i965_decoder.h
+++ b/src/i965_decoder.h
@@ -29,6 +29,7 @@
#include <stdlib.h>
#include <va/va.h>
+#include <va/va_dec_vp8.h>
#include <intel_bufmgr.h>
#define MAX_GEN_REFERENCE_FRAMES 16
@@ -38,6 +39,21 @@ struct gen_frame_store {
VASurfaceID surface_id;
int frame_store_id;
struct object_surface *obj_surface;
+
+ /* This represents the time when this frame store was last used to
+ hold a reference frame. This is not connected to a presentation
+ timestamp (PTS), and this is not a common decoding time stamp
+ (DTS) either. It serves the purpose of tracking retired
+ reference frame candidates.
+
+ This is only used for H.264 decoding on platforms before Haswell */
+ uint64_t ref_age;
+};
+
+typedef struct gen_frame_store_context GenFrameStoreContext;
+struct gen_frame_store_context {
+ uint64_t age;
+ int prev_poc;
};
typedef struct gen_buffer GenBuffer;
@@ -49,4 +65,6 @@ struct gen_buffer {
struct hw_context *
gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
+extern struct hw_context *
+gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
#endif /* I965_DECODER_H */
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index 4ef09b5..8b546db 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -22,10 +22,11 @@
*/
#include "sysdeps.h"
-
+#include <limits.h>
#include <alloca.h>
#include "intel_batchbuffer.h"
+#include "intel_media.h"
#include "i965_drv_video.h"
#include "i965_decoder_utils.h"
#include "i965_defines.h"
@@ -139,7 +140,7 @@ mpeg2_set_reference_surfaces(
ref_frames[n++].surface_id = ref_frames[0].surface_id;
}
- if (pic_param->picture_coding_extension.bits.progressive_frame)
+ if (pic_param->picture_coding_extension.bits.frame_pred_frame_dct)
return;
ref_frames[2].surface_id = VA_INVALID_ID;
@@ -174,6 +175,75 @@ mpeg2_set_reference_surfaces(
}
}
+/* Ensure the supplied VA surface has valid storage for decoding the
+ current picture */
+VAStatus
+avc_ensure_surface_bo(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct object_surface *obj_surface,
+ const VAPictureParameterBufferH264 *pic_param
+)
+{
+ VAStatus va_status;
+ uint32_t hw_fourcc, fourcc, subsample, chroma_format;
+
+ /* Validate chroma format */
+ switch (pic_param->seq_fields.bits.chroma_format_idc) {
+ case 0: // Grayscale
+ fourcc = VA_FOURCC_Y800;
+ subsample = SUBSAMPLE_YUV400;
+ chroma_format = VA_RT_FORMAT_YUV400;
+ break;
+ case 1: // YUV 4:2:0
+ fourcc = VA_FOURCC_NV12;
+ subsample = SUBSAMPLE_YUV420;
+ chroma_format = VA_RT_FORMAT_YUV420;
+ break;
+ default:
+ return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+ }
+
+ /* Determine the HW surface format, bound to VA config needs */
+ if ((decode_state->base.chroma_formats & chroma_format) == chroma_format)
+ hw_fourcc = fourcc;
+ else {
+ hw_fourcc = 0;
+ switch (fourcc) {
+ case VA_FOURCC_Y800: // Implement with an NV12 surface
+ if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) {
+ hw_fourcc = VA_FOURCC_NV12;
+ subsample = SUBSAMPLE_YUV420;
+ }
+ break;
+ }
+ }
+ if (!hw_fourcc)
+ return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+
+ /* (Re-)allocate the underlying surface buffer store, if necessary */
+ if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+
+ i965_destroy_surface_storage(obj_surface);
+ va_status = i965_check_alloc_surface_bo(ctx, obj_surface,
+ i965->codec_info->has_tiled_surface, hw_fourcc, subsample);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+ }
+
+ /* Fake chroma components if grayscale is implemented on top of NV12 */
+ if (fourcc == VA_FOURCC_Y800 && hw_fourcc == VA_FOURCC_NV12) {
+ const uint32_t uv_offset = obj_surface->width * obj_surface->height;
+ const uint32_t uv_size = obj_surface->width * obj_surface->height / 2;
+
+ drm_intel_gem_bo_map_gtt(obj_surface->bo);
+ memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
+ drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
+ }
+ return VA_STATUS_SUCCESS;
+}
+
/* Generate flat scaling matrices for H.264 decoding */
void
avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
@@ -185,6 +255,56 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
}
+/* Returns the POC of the supplied VA picture */
+static int
+avc_get_picture_poc(const VAPictureH264 *va_pic)
+{
+ int structure, field_poc[2];
+
+ structure = va_pic->flags &
+ (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD);
+ field_poc[0] = structure != VA_PICTURE_H264_BOTTOM_FIELD ?
+ va_pic->TopFieldOrderCnt : INT_MAX;
+ field_poc[1] = structure != VA_PICTURE_H264_TOP_FIELD ?
+ va_pic->BottomFieldOrderCnt : INT_MAX;
+ return MIN(field_poc[0], field_poc[1]);
+}
+
+/* Returns a unique picture ID that represents the supplied VA surface object */
+int
+avc_get_picture_id(struct object_surface *obj_surface)
+{
+ int pic_id;
+
+ /* This highly depends on how the internal VA objects are organized.
+
+ Theory of operations:
+ The VA objects are maintained in heaps so that any released VA
+ surface will become free again for future allocation. This means
+ that holes in there are filled in for subsequent allocations.
+ So, this ultimately means that we could just use the Heap ID of
+ the VA surface as the resulting picture ID (16 bits) */
+ pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK);
+ return (pic_id <= 0xffff) ? pic_id : -1;
+}
+
+/* Finds the VA/H264 picture associated with the specified VA surface id */
+VAPictureH264 *
+avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count)
+{
+ int i;
+
+ if (id != VA_INVALID_ID) {
+ for (i = 0; i < pic_list_count; i++) {
+ VAPictureH264 * const va_pic = &pic_list[i];
+ if (va_pic->picture_id == id &&
+ !(va_pic->flags & VA_PICTURE_H264_INVALID))
+ return va_pic;
+ }
+ }
+ return NULL;
+}
+
/* Get first macroblock bit offset for BSD, minus EPB count (AVC) */
/* XXX: slice_data_bit_offset does not account for EPB */
unsigned int
@@ -245,8 +365,24 @@ avc_get_first_mb_bit_offset_with_epb(
static inline uint8_t
get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id)
{
+ /* The H.264 standard, and the VA-API specification, allows for at
+ least 3 states for a picture: "used for short-term reference",
+ "used for long-term reference", or considered as not used for
+ reference.
+
+ The latter is used in the MVC inter prediction and inter-view
+ prediction process (H.8.4). This has an incidence on the
+ colZeroFlag variable, as defined in 8.4.1.2.
+
+ Since it is not possible to directly program that flag, let's
+ make the hardware derive this value by assimilating "considered
+ as not used for reference" to a "not used for short-term
+ reference", and subsequently making it "used for long-term
+ reference" to fit the definition of Bit6 here */
+ const unsigned int ref_flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE |
+ VA_PICTURE_H264_LONG_TERM_REFERENCE;
const unsigned int is_long_term =
- !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
+ ((va_pic->flags & ref_flags) != VA_PICTURE_H264_SHORT_TERM_REFERENCE);
const unsigned int is_top_field =
!!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
const unsigned int is_bottom_field =
@@ -267,28 +403,35 @@ gen5_fill_avc_ref_idx_state(
const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
)
{
- unsigned int i, n, frame_idx;
+ int i, j;
- for (i = 0, n = 0; i < ref_list_count; i++) {
+ for (i = 0; i < ref_list_count; i++) {
const VAPictureH264 * const va_pic = &ref_list[i];
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
+ if ((va_pic->flags & VA_PICTURE_H264_INVALID) ||
+ va_pic->picture_id == VA_INVALID_ID) {
+ state[i] = 0xff;
continue;
+ }
- for (frame_idx = 0; frame_idx < MAX_GEN_REFERENCE_FRAMES; frame_idx++) {
- const GenFrameStore * const fs = &frame_store[frame_idx];
- if (fs->surface_id != VA_INVALID_ID &&
- fs->surface_id == va_pic->picture_id) {
- assert(frame_idx == fs->frame_store_id);
+ for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) {
+ if (frame_store[j].surface_id == va_pic->picture_id)
break;
- }
}
- assert(frame_idx < MAX_GEN_REFERENCE_FRAMES);
- state[n++] = get_ref_idx_state_1(va_pic, frame_idx);
+
+ if (j != MAX_GEN_REFERENCE_FRAMES) { // Found picture in the Frame Store
+ const GenFrameStore * const fs = &frame_store[j];
+ assert(fs->frame_store_id == j); // Current architecture/assumption
+ state[i] = get_ref_idx_state_1(va_pic, fs->frame_store_id);
+ }
+ else {
+ WARN_ONCE("Invalid RefPicListX[] entry!!! It is not included in DPB\n");
+ state[i] = get_ref_idx_state_1(va_pic, 0) | 0x80;
+ }
}
- for (; n < 32; n++)
- state[n] = 0xff;
+ for (; i < 32; i++)
+ state[i] = 0xff;
}
/* Emit Reference List Entries (Gen6+: SNB, IVB) */
@@ -344,138 +487,249 @@ gen6_send_avc_ref_idx_state(
);
}
-void
-intel_update_avc_frame_store_index(VADriverContextP ctx,
- struct decode_state *decode_state,
- VAPictureParameterBufferH264 *pic_param,
- GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES])
+static void
+gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct intel_batchbuffer *batch)
{
- int i, j;
+ int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+ int slice_hor_pos, slice_ver_pos, slice_start_mb_num, next_slice_hor_pos, next_slice_ver_pos;
+ int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
+ pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+
+ if (next_slice_param) {
+ int first_mb_in_next_slice;
+
+ slice_hor_pos = 0;
+ slice_ver_pos = 0;
+ slice_start_mb_num = 0;
+ first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
+ next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
+ next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+ } else {
+ slice_hor_pos = 0;
+ slice_ver_pos = height_in_mbs;
+ slice_start_mb_num = width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
+ next_slice_hor_pos = 0;
+ next_slice_ver_pos = 0;
+ }
- assert(MAX_GEN_REFERENCE_FRAMES == ARRAY_ELEMS(pic_param->ReferenceFrames));
+ BEGIN_BCS_BATCH(batch, 11);
+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ slice_ver_pos << 24 |
+ slice_hor_pos << 16 |
+ slice_start_mb_num << 0);
+ OUT_BCS_BATCH(batch,
+ next_slice_ver_pos << 16 |
+ next_slice_hor_pos << 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
- for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
- int found = 0;
+static void
+gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ struct intel_batchbuffer *batch)
+{
- if (frame_store[i].surface_id == VA_INVALID_ID ||
- frame_store[i].obj_surface == NULL)
- continue;
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
- assert(frame_store[i].frame_store_id != -1);
+void
+gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct intel_batchbuffer *batch)
+{
+ gen6_mfd_avc_phantom_slice_state(ctx, pic_param, next_slice_param, batch);
+ gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, batch);
+}
- for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) {
- VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
- if (ref_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
+/* Comparison function for sorting out the array of free frame store entries */
+static int
+compare_avc_ref_store_func(const void *p1, const void *p2)
+{
+ const GenFrameStore * const fs1 = *((GenFrameStore **)p1);
+ const GenFrameStore * const fs2 = *((GenFrameStore **)p2);
- if (frame_store[i].surface_id == ref_pic->picture_id) {
- found = 1;
- break;
- }
- }
+ return fs1->ref_age - fs2->ref_age;
+}
- /* remove it from the internal DPB */
- if (!found) {
- struct object_surface *obj_surface = frame_store[i].obj_surface;
-
- obj_surface->flags &= ~SURFACE_REFERENCED;
+void
+intel_update_avc_frame_store_index(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ VAPictureParameterBufferH264 *pic_param,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES],
+ GenFrameStoreContext *fs_ctx
+)
+{
+ GenFrameStore *free_refs[MAX_GEN_REFERENCE_FRAMES];
+ uint32_t used_refs = 0, add_refs = 0;
+ uint64_t age;
+ int i, n, num_free_refs;
+
+ /* Detect changes of access unit */
+ const int poc = avc_get_picture_poc(&pic_param->CurrPic);
+ if (fs_ctx->age == 0 || fs_ctx->prev_poc != poc)
+ fs_ctx->age++;
+ fs_ctx->prev_poc = poc;
+ age = fs_ctx->age;
+
+ /* Tag entries that are still available in our Frame Store */
+ for (i = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) {
+ struct object_surface * const obj_surface =
+ decode_state->reference_objects[i];
+ if (!obj_surface)
+ continue;
- if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
- dri_bo_unreference(obj_surface->bo);
- obj_surface->bo = NULL;
- obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
+ GenAvcSurface * const avc_surface = obj_surface->private_data;
+ if (!avc_surface)
+ continue;
+ if (avc_surface->frame_store_id >= 0) {
+ GenFrameStore * const fs =
+ &frame_store[avc_surface->frame_store_id];
+ if (fs->surface_id == obj_surface->base.id) {
+ fs->obj_surface = obj_surface;
+ fs->ref_age = age;
+ used_refs |= 1 << fs->frame_store_id;
+ continue;
}
-
- if (obj_surface->free_private_data)
- obj_surface->free_private_data(&obj_surface->private_data);
-
- frame_store[i].surface_id = VA_INVALID_ID;
- frame_store[i].frame_store_id = -1;
- frame_store[i].obj_surface = NULL;
}
+ add_refs |= 1 << i;
}
- for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
- VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
- int found = 0;
-
- if (ref_pic->flags & VA_PICTURE_H264_INVALID ||
- ref_pic->picture_id == VA_INVALID_SURFACE ||
- decode_state->reference_objects[i] == NULL)
+ /* Build and sort out the list of retired candidates. The resulting
+ list is ordered by increasing age when they were last used */
+ for (i = 0, n = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ if (!(used_refs & (1 << i))) {
+ GenFrameStore * const fs = &frame_store[i];
+ fs->obj_surface = NULL;
+ free_refs[n++] = fs;
+ }
+ }
+ num_free_refs = n;
+ qsort(&free_refs[0], n, sizeof(free_refs[0]), compare_avc_ref_store_func);
+
+ /* Append the new reference frames */
+ for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) {
+ struct object_surface * const obj_surface =
+ decode_state->reference_objects[i];
+ if (!obj_surface || !(add_refs & (1 << i)))
continue;
- for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) {
- if (frame_store[j].surface_id == ref_pic->picture_id) {
- found = 1;
- break;
- }
+ GenAvcSurface * const avc_surface = obj_surface->private_data;
+ if (!avc_surface)
+ continue;
+ if (n < num_free_refs) {
+ GenFrameStore * const fs = free_refs[n++];
+ fs->surface_id = obj_surface->base.id;
+ fs->obj_surface = obj_surface;
+ fs->frame_store_id = fs - frame_store;
+ fs->ref_age = age;
+ avc_surface->frame_store_id = fs->frame_store_id;
+ continue;
}
+ WARN_ONCE("No free slot found for DPB reference list!!!\n");
+ }
+}
- /* add the new reference frame into the internal DPB */
- if (!found) {
- int frame_idx;
- struct object_surface *obj_surface = decode_state->reference_objects[i];
+void
+gen75_update_avc_frame_store_index(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ VAPictureParameterBufferH264 *pic_param,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
+)
+{
+ int i, n;
+
+ /* Construct the Frame Store array, in compact form. i.e. empty or
+ invalid entries are discarded. */
+ for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) {
+ struct object_surface * const obj_surface =
+ decode_state->reference_objects[i];
+ if (!obj_surface)
+ continue;
- /*
- * Sometimes a dummy frame comes from the upper layer library, call i965_check_alloc_surface_bo()
- * to ake sure the store buffer is allocated for this reference frame
- */
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ GenFrameStore * const fs = &frame_store[n];
+ fs->surface_id = obj_surface->base.id;
+ fs->obj_surface = obj_surface;
+ fs->frame_store_id = n++;
+ }
- /* Find a free frame store index */
- for (frame_idx = 0; frame_idx < MAX_GEN_REFERENCE_FRAMES; frame_idx++) {
- for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) {
- if (frame_store[j].surface_id == VA_INVALID_ID ||
- frame_store[j].obj_surface == NULL)
- continue;
+ /* Any remaining entry is marked as invalid */
+ for (; n < MAX_GEN_REFERENCE_FRAMES; n++) {
+ GenFrameStore * const fs = &frame_store[n];
+ fs->surface_id = VA_INVALID_ID;
+ fs->obj_surface = NULL;
+ fs->frame_store_id = -1;
+ }
+}
- if (frame_store[j].frame_store_id == frame_idx) /* the store index is in use */
- break;
- }
+bool
+gen75_fill_avc_picid_list(
+ uint16_t pic_ids[16],
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
+)
+{
+ int i, pic_id;
- if (j == MAX_GEN_REFERENCE_FRAMES)
- break;
- }
+ /* Fill in with known picture IDs. The Frame Store array is in
+ compact form, i.e. empty entries are only to be found at the
+ end of the array: there are no holes in the set of active
+ reference frames */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ GenFrameStore * const fs = &frame_store[i];
+ if (!fs->obj_surface)
+ break;
+ pic_id = avc_get_picture_id(fs->obj_surface);
+ if (pic_id < 0)
+ return false;
+ pic_ids[i] = pic_id;
+ }
- assert(frame_idx < MAX_GEN_REFERENCE_FRAMES);
+ /* When an element of the list is not relevant the value of the
+ picture ID shall be set to 0 */
+ for (; i < MAX_GEN_REFERENCE_FRAMES; i++)
+ pic_ids[i] = 0;
+ return true;
+}
- for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) {
- if (frame_store[j].surface_id == VA_INVALID_ID ||
- frame_store[j].obj_surface == NULL) {
- frame_store[j].surface_id = ref_pic->picture_id;
- frame_store[j].frame_store_id = frame_idx;
- frame_store[j].obj_surface = obj_surface;
- break;
- }
- }
- }
- }
+bool
+gen75_send_avc_picid_state(
+ struct intel_batchbuffer *batch,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
+)
+{
+ uint16_t pic_ids[16];
- /* sort */
- for (i = 0; i < MAX_GEN_REFERENCE_FRAMES - 1; i++) {
- if (frame_store[i].surface_id != VA_INVALID_ID &&
- frame_store[i].obj_surface != NULL &&
- frame_store[i].frame_store_id == i)
- continue;
+ if (!gen75_fill_avc_picid_list(pic_ids, frame_store))
+ return false;
- for (j = i + 1; j < MAX_GEN_REFERENCE_FRAMES; j++) {
- if (frame_store[j].surface_id != VA_INVALID_ID &&
- frame_store[j].obj_surface != NULL &&
- frame_store[j].frame_store_id == i) {
- VASurfaceID id = frame_store[i].surface_id;
- int frame_idx = frame_store[i].frame_store_id;
- struct object_surface *obj_surface = frame_store[i].obj_surface;
-
- frame_store[i].surface_id = frame_store[j].surface_id;
- frame_store[i].frame_store_id = frame_store[j].frame_store_id;
- frame_store[i].obj_surface = frame_store[j].obj_surface;
- frame_store[j].surface_id = id;
- frame_store[j].frame_store_id = frame_idx;
- frame_store[j].obj_surface = obj_surface;
- break;
- }
- }
- }
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
+ OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping
+ intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids));
+ ADVANCE_BCS_BATCH(batch);
+ return true;
}
void
@@ -517,14 +771,70 @@ intel_update_vc1_frame_store_index(VADriverContextP ctx,
}
+void
+intel_update_vp8_frame_store_index(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ VAPictureParameterBufferVP8 *pic_param,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES])
+{
+ struct object_surface *obj_surface;
+ int i;
+
+ obj_surface = decode_state->reference_objects[0];
+
+ if (pic_param->last_ref_frame == VA_INVALID_ID ||
+ !obj_surface ||
+ !obj_surface->bo) {
+ frame_store[0].surface_id = VA_INVALID_ID;
+ frame_store[0].obj_surface = NULL;
+ } else {
+ frame_store[0].surface_id = pic_param->last_ref_frame;
+ frame_store[0].obj_surface = obj_surface;
+ }
+
+ obj_surface = decode_state->reference_objects[1];
+
+ if (pic_param->golden_ref_frame == VA_INVALID_ID ||
+ !obj_surface ||
+ !obj_surface->bo) {
+ frame_store[1].surface_id = frame_store[0].surface_id;
+ frame_store[1].obj_surface = frame_store[0].obj_surface;
+ } else {
+ frame_store[1].surface_id = pic_param->golden_ref_frame;
+ frame_store[1].obj_surface = obj_surface;
+ }
+
+ obj_surface = decode_state->reference_objects[2];
+
+ if (pic_param->alt_ref_frame == VA_INVALID_ID ||
+ !obj_surface ||
+ !obj_surface->bo) {
+ frame_store[2].surface_id = frame_store[0].surface_id;
+ frame_store[2].obj_surface = frame_store[0].obj_surface;
+ } else {
+ frame_store[2].surface_id = pic_param->alt_ref_frame;
+ frame_store[2].obj_surface = obj_surface;
+ }
+
+ for (i = 3; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ frame_store[i].surface_id = frame_store[i % 2].surface_id;
+ frame_store[i].obj_surface = frame_store[i % 2].obj_surface;
+ }
+
+}
+
static VAStatus
intel_decoder_check_avc_parameter(VADriverContextP ctx,
+ VAProfile h264_profile,
struct decode_state *decode_state)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
VAPictureParameterBufferH264 *pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ VAStatus va_status;
struct object_surface *obj_surface;
int i;
+ VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
+ int j;
assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
assert(pic_param->CurrPic.picture_id != VA_INVALID_SURFACE);
@@ -538,27 +848,71 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx,
if (pic_param->CurrPic.picture_id != decode_state->current_render_target)
goto error;
- for (i = 0; i < 16; i++) {
- if (pic_param->ReferenceFrames[i].flags & VA_PICTURE_H264_INVALID ||
- pic_param->ReferenceFrames[i].picture_id == VA_INVALID_SURFACE)
- break;
- else {
- obj_surface = SURFACE(pic_param->ReferenceFrames[i].picture_id);
- assert(obj_surface);
+ if ((h264_profile != VAProfileH264Baseline)) {
+ if (pic_param->num_slice_groups_minus1 ||
+ pic_param->pic_fields.bits.redundant_pic_cnt_present_flag) {
+ WARN_ONCE("Unsupported the FMO/ASO constraints!!!\n");
+ goto error;
+ }
+ }
- if (!obj_surface)
- goto error;
+ /* Fill in the reference objects array with the actual VA surface
+ objects with 1:1 correspondance with any entry in ReferenceFrames[],
+ i.e. including "holes" for invalid entries, that are expanded
+ to NULL in the reference_objects[] array */
+ for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
+ const VAPictureH264 * const va_pic = &pic_param->ReferenceFrames[i];
- if (!obj_surface->bo) { /* a reference frame without store buffer */
- WARN_ONCE("Invalid reference frame!!!\n");
- }
+ obj_surface = NULL;
+ if (!(va_pic->flags & VA_PICTURE_H264_INVALID) &&
+ va_pic->picture_id != VA_INVALID_ID) {
+ obj_surface = SURFACE(pic_param->ReferenceFrames[i].picture_id);
+ if (!obj_surface)
+ return VA_STATUS_ERROR_INVALID_SURFACE;
- decode_state->reference_objects[i] = obj_surface;
+ /*
+ * Sometimes a dummy frame comes from the upper layer
+ * library, call i965_check_alloc_surface_bo() to make
+ * sure the store buffer is allocated for this reference
+ * frame
+ */
+ va_status = avc_ensure_surface_bo(ctx, decode_state, obj_surface,
+ pic_param);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
}
+ decode_state->reference_objects[i] = obj_surface;
}
- for ( ; i < 16; i++)
- decode_state->reference_objects[i] = NULL;
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ if (next_slice_param != NULL) {
+ /* If the mb position of next_slice is less than or equal to the current slice,
+ * discard the current frame.
+ */
+ if (next_slice_param->first_mb_in_slice <= slice_param->first_mb_in_slice) {
+ next_slice_param = NULL;
+ WARN_ONCE("!!!incorrect slice_param. The first_mb_in_slice of next_slice is less"
+ " than or equal to that in current slice\n");
+ goto error;
+ }
+ }
+ }
+ }
return VA_STATUS_SUCCESS;
@@ -617,7 +971,12 @@ intel_decoder_check_vc1_parameter(VADriverContextP ctx,
VAPictureParameterBufferVC1 *pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
struct object_surface *obj_surface;
int i = 0;
-
+
+ if (pic_param->sequence_fields.bits.interlace == 1 &&
+ pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
+ return VA_STATUS_ERROR_DECODING_ERROR;
+ }
+
if (pic_param->picture_fields.bits.picture_type == 0 ||
pic_param->picture_fields.bits.picture_type == 3) {
} else if (pic_param->picture_fields.bits.picture_type == 1 ||
@@ -654,6 +1013,48 @@ error:
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
+static VAStatus
+intel_decoder_check_vp8_parameter(VADriverContextP ctx,
+ struct decode_state *decode_state)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
+ struct object_surface *obj_surface;
+ int i = 0;
+
+ if (pic_param->last_ref_frame != VA_INVALID_SURFACE) {
+ obj_surface = SURFACE(pic_param->last_ref_frame);
+
+ if (obj_surface && obj_surface->bo)
+ decode_state->reference_objects[i++] = obj_surface;
+ else
+ decode_state->reference_objects[i++] = NULL;
+ }
+
+ if (pic_param->golden_ref_frame != VA_INVALID_SURFACE) {
+ obj_surface = SURFACE(pic_param->golden_ref_frame);
+
+ if (obj_surface && obj_surface->bo)
+ decode_state->reference_objects[i++] = obj_surface;
+ else
+ decode_state->reference_objects[i++] = NULL;
+ }
+
+ if (pic_param->alt_ref_frame != VA_INVALID_SURFACE) {
+ obj_surface = SURFACE(pic_param->alt_ref_frame);
+
+ if (obj_surface && obj_surface->bo)
+ decode_state->reference_objects[i++] = obj_surface;
+ else
+ decode_state->reference_objects[i++] = NULL;
+ }
+
+ for ( ; i < 16; i++)
+ decode_state->reference_objects[i] = NULL;
+
+ return VA_STATUS_SUCCESS;
+}
+
VAStatus
intel_decoder_sanity_check_input(VADriverContextP ctx,
VAProfile profile,
@@ -679,10 +1080,12 @@ intel_decoder_sanity_check_input(VADriverContextP ctx,
vaStatus = intel_decoder_check_mpeg2_parameter(ctx, decode_state);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
- vaStatus = intel_decoder_check_avc_parameter(ctx, decode_state);
+ case VAProfileH264StereoHigh:
+ case VAProfileH264MultiviewHigh:
+ vaStatus = intel_decoder_check_avc_parameter(ctx, profile, decode_state);
break;
case VAProfileVC1Simple:
@@ -695,6 +1098,10 @@ intel_decoder_sanity_check_input(VADriverContextP ctx,
vaStatus = VA_STATUS_SUCCESS;
break;
+ case VAProfileVP8Version0_3:
+ vaStatus = intel_decoder_check_vp8_parameter(ctx, decode_state);
+ break;
+
default:
vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER;
break;
@@ -703,3 +1110,68 @@ intel_decoder_sanity_check_input(VADriverContextP ctx,
out:
return vaStatus;
}
+
+/*
+ * Return the next slice paramter
+ *
+ * Input:
+ * slice_param: the current slice
+ * *group_idx & *element_idx the current slice position in slice groups
+ * Output:
+ * Return the next slice parameter
+ * *group_idx & *element_idx the next slice position in slice groups,
+ * if the next slice is NULL, *group_idx & *element_idx will be ignored
+ */
+VASliceParameterBufferMPEG2 *
+intel_mpeg2_find_next_slice(struct decode_state *decode_state,
+ VAPictureParameterBufferMPEG2 *pic_param,
+ VASliceParameterBufferMPEG2 *slice_param,
+ int *group_idx,
+ int *element_idx)
+{
+ VASliceParameterBufferMPEG2 *next_slice_param;
+ unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+ int j = *group_idx, i = *element_idx + 1;
+
+ for (; j < decode_state->num_slice_params; j++) {
+ for (; i < decode_state->slice_params[j]->num_elements; i++) {
+ next_slice_param = ((VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer) + i;
+
+ if ((next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) >=
+ (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position)) {
+ *group_idx = j;
+ *element_idx = i;
+
+ return next_slice_param;
+ }
+ }
+
+ i = 0;
+ }
+
+ return NULL;
+}
+
+/* Ensure the segmentation buffer is large enough for the supplied
+ number of MBs, or re-allocate it */
+bool
+intel_ensure_vp8_segmentation_buffer(VADriverContextP ctx, GenBuffer *buf,
+ unsigned int mb_width, unsigned int mb_height)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ /* The segmentation map is a 64-byte aligned linear buffer, with
+ each cache line holding only 8 bits for 4 continuous MBs */
+ const unsigned int buf_size = ((mb_width + 3) / 4) * 64 * mb_height;
+
+ if (buf->valid) {
+ if (buf->bo && buf->bo->size >= buf_size)
+ return true;
+ drm_intel_bo_unreference(buf->bo);
+ buf->valid = false;
+ }
+
+ buf->bo = drm_intel_bo_alloc(i965->intel.bufmgr, "segmentation map",
+ buf_size, 0x1000);
+ buf->valid = buf->bo != NULL;
+ return buf->valid;
+}
diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
index 2a71f3e..3e6acdd 100644
--- a/src/i965_decoder_utils.h
+++ b/src/i965_decoder_utils.h
@@ -43,9 +43,23 @@ mpeg2_set_reference_surfaces(
VAPictureParameterBufferMPEG2 *pic_param
);
+VAStatus
+avc_ensure_surface_bo(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct object_surface *obj_surface,
+ const VAPictureParameterBufferH264 *pic_param
+);
+
void
avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
+int
+avc_get_picture_id(struct object_surface *obj_surface);
+
+VAPictureH264 *
+avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count);
+
unsigned int
avc_get_first_mb_bit_offset(
dri_bo *slice_data_bo,
@@ -75,20 +89,69 @@ gen6_send_avc_ref_idx_state(
const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
);
+void
+gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct intel_batchbuffer *batch
+);
+
VAStatus
intel_decoder_sanity_check_input(VADriverContextP ctx,
VAProfile profile,
struct decode_state *decode_state);
void
-intel_update_avc_frame_store_index(VADriverContextP ctx,
- struct decode_state *decode_state,
- VAPictureParameterBufferH264 *pic_param,
- GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
+intel_update_avc_frame_store_index(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ VAPictureParameterBufferH264 *pic_param,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES],
+ GenFrameStoreContext *fs_ctx
+);
+
+void
+gen75_update_avc_frame_store_index(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ VAPictureParameterBufferH264 *pic_param,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
+);
+
+bool
+gen75_fill_avc_picid_list(
+ uint16_t pic_ids[16],
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
+);
+
+bool
+gen75_send_avc_picid_state(
+ struct intel_batchbuffer *batch,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
+);
void
intel_update_vc1_frame_store_index(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferVC1 *pic_param,
GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
+
+VASliceParameterBufferMPEG2 *
+intel_mpeg2_find_next_slice(struct decode_state *decode_state,
+ VAPictureParameterBufferMPEG2 *pic_param,
+ VASliceParameterBufferMPEG2 *slice_param,
+ int *group_idx,
+ int *element_idx);
+
+
+void
+intel_update_vp8_frame_store_index(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ VAPictureParameterBufferVP8 *pic_param,
+ GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
+
+bool
+intel_ensure_vp8_segmentation_buffer(VADriverContextP ctx, GenBuffer *buf,
+ unsigned int mb_width, unsigned int mb_height);
+
#endif /* I965_DECODER_UTILS_H */
diff --git a/src/i965_defines.h b/src/i965_defines.h
index 8e4350a..6bf8e0d 100755
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -107,6 +107,11 @@
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+#define GEN8_3DSTATE_RASTER CMD(3, 0, 0x50)
+# define GEN8_3DSTATE_RASTER_CULL_BOTH (0 << 16)
+# define GEN8_3DSTATE_RASTER_CULL_NONE (1 << 16)
+# define GEN8_3DSTATE_RASTER_CULL_FRONT (2 << 16)
+# define GEN8_3DSTATE_RASTER_CULL_BACK (3 << 16)
#define GEN6_3DSTATE_WM CMD(3, 0, 0x14)
/* DW2 */
@@ -174,6 +179,10 @@
#define GEN6_3DSTATE_CONSTANT_GS CMD(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS CMD(3, 0, 0x17)
+/* Gen8 WM_HZ_OP */
+#define GEN8_3DSTATE_WM_HZ_OP CMD(3, 0, 0x52)
+
+
# define GEN6_3DSTATE_CONSTANT_BUFFER_3_ENABLE (1 << 15)
# define GEN6_3DSTATE_CONSTANT_BUFFER_2_ENABLE (1 << 14)
# define GEN6_3DSTATE_CONSTANT_BUFFER_1_ENABLE (1 << 13)
@@ -189,9 +198,13 @@
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+#define GEN8_3DSTATE_MULTISAMPLE CMD(3, 0, 0x0d)
+#define GEN8_3DSTATE_SAMPLE_PATTERN CMD(3, 1, 0x1C)
+
/* GEN7 */
#define GEN7_3DSTATE_CLEAR_PARAMS CMD(3, 0, 0x04)
#define GEN7_3DSTATE_DEPTH_BUFFER CMD(3, 0, 0x05)
+#define GEN7_3DSTATE_HIER_DEPTH_BUFFER CMD(3, 0, 0x07)
#define GEN7_3DSTATE_URB_VS CMD(3, 0, 0x30)
#define GEN7_3DSTATE_URB_HS CMD(3, 0, 0x31)
@@ -204,8 +217,14 @@
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS CMD(3, 1, 0x12)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS CMD(3, 1, 0x16)
+
+#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS CMD(3, 1, 0x14)
+#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS CMD(3, 1, 0x13)
+#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS CMD(3, 1, 0x15)
/* DW1 */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
+# define GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
+# define GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT 0
#define GEN7_3DSTATE_CONSTANT_HS CMD(3, 0, 0x19)
#define GEN7_3DSTATE_CONSTANT_DS CMD(3, 0, 0x1a)
@@ -223,6 +242,11 @@
# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
+# define GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH (1 << 29)
+# define GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET (1 << 28)
+
+# define GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT 5
+#define GEN8_3DSTATE_SBE_SWIZ CMD(3, 0, 0x51)
#define GEN7_3DSTATE_PS CMD(3, 0, 0x20)
/* DW1: kernel pointer */
@@ -255,6 +279,37 @@
/* DW6: kernel 1 pointer */
/* DW7: kernel 2 pointer */
+# define GEN8_PS_MAX_THREADS_SHIFT 23
+
+#define GEN8_3DSTATE_PSEXTRA CMD(3, 0, 0x4f)
+/* DW1 */
+# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31)
+# define GEN8_PSX_PSCDEPTH_OFF (0 << 26)
+# define GEN8_PSX_PSCDEPTH_ON (1 << 26)
+# define GEN8_PSX_PSCDEPTH_ON_GE (2 << 26)
+# define GEN8_PSX_PSCDEPTH_ON_LE (3 << 26)
+# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
+
+#define GEN8_3DSTATE_PSBLEND CMD(3, 0, 0x4d)
+/* DW1 */
+# define GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31)
+# define GEN8_PS_BLEND_HAS_WRITEABLE_RT (1 << 30)
+# define GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 29)
+# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(28, 24)
+# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 24
+# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(23, 19)
+# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 19
+# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(18, 14)
+# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT 14
+# define GEN8_PS_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(13, 9)
+# define GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT 9
+# define GEN8_PS_BLEND_ALPHA_TEST_ENABLE (1 << 8)
+# define GEN8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7)
+
+
+#define GEN7_3DSTATE_STENCIL_BUFFER CMD(3, 0, 0x06)
+#define GEN8_3DSTATE_WM_DEPTH_STENCIL CMD(3, 0, 0x4e)
+
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL CMD(3, 0, 0x21)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC CMD(3, 0, 0x23)
@@ -270,6 +325,8 @@
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS CMD(3, 0, 0x2b)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS CMD(3, 0, 0x2e)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS CMD(3, 0, 0x2f)
+#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS CMD(3, 0, 0x2c)
+#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS CMD(3, 0, 0x2d)
#define MFX(pipeline, op, sub_opa, sub_opb) \
(3 << 29 | \
@@ -328,6 +385,11 @@
#define MFD_JPEG_BSD_OBJECT MFX(2, 7, 1, 8)
+#define MFX_VP8_PIC_STATE MFX(2, 4, 0, 0)
+
+#define MFD_VP8_BSD_OBJECT MFX(2, 4, 1, 8)
+
+
#define VEB(pipeline, op, sub_opa, sub_opb) \
(3 << 29 | \
(pipeline) << 27 | \
@@ -610,6 +672,8 @@
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
+#define GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN8 */
+#define GEN8_VE0_VALID (1 << 25) /* for GEN8 */
#define VB0_BUFFER_INDEX_SHIFT 27
#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
@@ -619,6 +683,8 @@
#define GEN6_VB0_INSTANCEDATA (1 << 20)
#define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
#define VB0_BUFFER_PITCH_SHIFT 0
+#define GEN8_VB0_BUFFER_INDEX_SHIFT 26
+#define GEN8_VB0_MOCS_SHIFT 16
#define _3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define _3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
@@ -649,6 +715,8 @@
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+#define GEN8_3DSTATE_VF_TOPOLOGY CMD(3, 0, 0x4b)
+
#define I965_TILEWALK_XMAJOR 0
#define I965_TILEWALK_YMAJOR 1
@@ -705,6 +773,8 @@
#define MFX_FORMAT_VC1 1
#define MFX_FORMAT_AVC 2
#define MFX_FORMAT_JPEG 3
+#define MFX_FORMAT_SVC 4
+#define MFX_FORMAT_VP8 5
#define MFX_SHORT_MODE 0
#define MFX_LONG_MODE 1
@@ -752,9 +822,4 @@
#define SUBSAMPLE_YUV411 5
#define SUBSAMPLE_RGBX 6
-#define URB_SIZE(intel) (IS_GEN7(intel->device_id) ? 4096 : \
- IS_GEN6(intel->device_id) ? 1024 : \
- IS_IRONLAKE(intel->device_id) ? 1024 : \
- IS_G4X(intel->device_id) ? 384 : 256)
-
#endif /* _I965_DEFINES_H_ */
diff --git a/src/i965_device_info.c b/src/i965_device_info.c
new file mode 100644
index 0000000..ea835da
--- /dev/null
+++ b/src/i965_device_info.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "i965_drv_video.h"
+
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <cpuid.h>
+
+/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */
+#define EXTRA_H264_DEC_CHROMA_FORMATS \
+ (VA_RT_FORMAT_YUV400)
+
+/* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */
+#define EXTRA_JPEG_DEC_CHROMA_FORMATS \
+ (VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \
+ VA_RT_FORMAT_YUV444)
+
+/* Defines VA profile as a 32-bit unsigned integer mask */
+#define VA_PROFILE_MASK(PROFILE) \
+ (1U << VAProfile##PROFILE)
+
+extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *);
+extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *);
+extern bool genx_render_init(VADriverContextP);
+
+static struct hw_codec_info g4x_hw_codec_info = {
+ .dec_hw_context_init = g4x_dec_hw_context_init,
+ .enc_hw_context_init = NULL,
+ .proc_hw_context_init = NULL,
+ .render_init = genx_render_init,
+ .post_processing_context_init = NULL,
+
+ .max_width = 2048,
+ .max_height = 2048,
+ .min_linear_wpitch = 16,
+ .min_linear_hpitch = 16,
+
+ .has_mpeg2_decoding = 1,
+ .has_h264_decoding = 1,
+
+ .num_filters = 0,
+};
+
+extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *);
+extern void i965_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *);
+
+static struct hw_codec_info ilk_hw_codec_info = {
+ .dec_hw_context_init = ironlake_dec_hw_context_init,
+ .enc_hw_context_init = NULL,
+ .proc_hw_context_init = i965_proc_context_init,
+ .render_init = genx_render_init,
+ .post_processing_context_init = i965_post_processing_context_init,
+
+ .max_width = 2048,
+ .max_height = 2048,
+ .min_linear_wpitch = 16,
+ .min_linear_hpitch = 16,
+
+ .has_mpeg2_decoding = 1,
+ .has_h264_decoding = 1,
+ .has_vpp = 1,
+ .has_accelerated_putimage = 1,
+
+ .num_filters = 0,
+};
+
+extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *);
+extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *);
+static struct hw_codec_info snb_hw_codec_info = {
+ .dec_hw_context_init = gen6_dec_hw_context_init,
+ .enc_hw_context_init = gen6_enc_hw_context_init,
+ .proc_hw_context_init = i965_proc_context_init,
+ .render_init = genx_render_init,
+ .post_processing_context_init = i965_post_processing_context_init,
+
+ .max_width = 2048,
+ .max_height = 2048,
+ .min_linear_wpitch = 16,
+ .min_linear_hpitch = 16,
+
+ .h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh),
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
+
+ .has_mpeg2_decoding = 1,
+ .has_h264_decoding = 1,
+ .has_h264_encoding = 1,
+ .has_vc1_decoding = 1,
+ .has_vpp = 1,
+ .has_accelerated_getimage = 1,
+ .has_accelerated_putimage = 1,
+ .has_tiled_surface = 1,
+ .has_di_motion_adptive = 1,
+
+ .num_filters = 2,
+ .filters = {
+ { VAProcFilterNoiseReduction, I965_RING_NULL },
+ { VAProcFilterDeinterlacing, I965_RING_NULL },
+ },
+};
+
+extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *);
+extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *);
+static struct hw_codec_info ivb_hw_codec_info = {
+ .dec_hw_context_init = gen7_dec_hw_context_init,
+ .enc_hw_context_init = gen7_enc_hw_context_init,
+ .proc_hw_context_init = i965_proc_context_init,
+ .render_init = genx_render_init,
+ .post_processing_context_init = i965_post_processing_context_init,
+
+ .max_width = 4096,
+ .max_height = 4096,
+ .min_linear_wpitch = 64,
+ .min_linear_hpitch = 16,
+
+ .h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh),
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
+ .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
+
+ .has_mpeg2_decoding = 1,
+ .has_mpeg2_encoding = 1,
+ .has_h264_decoding = 1,
+ .has_h264_encoding = 1,
+ .has_vc1_decoding = 1,
+ .has_jpeg_decoding = 1,
+ .has_vpp = 1,
+ .has_accelerated_getimage = 1,
+ .has_accelerated_putimage = 1,
+ .has_tiled_surface = 1,
+ .has_di_motion_adptive = 1,
+
+ .num_filters = 2,
+ .filters = {
+ { VAProcFilterNoiseReduction, I965_RING_NULL },
+ { VAProcFilterDeinterlacing, I965_RING_NULL },
+ },
+};
+
+static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info);
+
+extern struct hw_context *gen75_dec_hw_context_init(VADriverContextP, struct object_config *);
+extern struct hw_context *gen75_enc_hw_context_init(VADriverContextP, struct object_config *);
+extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *);
+static struct hw_codec_info hsw_hw_codec_info = {
+ .dec_hw_context_init = gen75_dec_hw_context_init,
+ .enc_hw_context_init = gen75_enc_hw_context_init,
+ .proc_hw_context_init = gen75_proc_context_init,
+ .render_init = genx_render_init,
+ .post_processing_context_init = i965_post_processing_context_init,
+ .preinit_hw_codec = hsw_hw_codec_preinit,
+
+ .max_width = 4096,
+ .max_height = 4096,
+ .min_linear_wpitch = 64,
+ .min_linear_hpitch = 16,
+
+ .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) |
+ VA_PROFILE_MASK(H264MultiviewHigh)),
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
+ .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
+
+ .has_mpeg2_decoding = 1,
+ .has_mpeg2_encoding = 1,
+ .has_h264_decoding = 1,
+ .has_h264_encoding = 1,
+ .has_vc1_decoding = 1,
+ .has_jpeg_decoding = 1,
+ .has_vpp = 1,
+ .has_accelerated_getimage = 1,
+ .has_accelerated_putimage = 1,
+ .has_tiled_surface = 1,
+ .has_di_motion_adptive = 1,
+ .has_di_motion_compensated = 1,
+ .has_h264_mvc_encoding = 1,
+
+ .num_filters = 5,
+ .filters = {
+ { VAProcFilterNoiseReduction, I965_RING_VEBOX },
+ { VAProcFilterDeinterlacing, I965_RING_VEBOX },
+ { VAProcFilterSharpening, I965_RING_NULL },
+ { VAProcFilterColorBalance, I965_RING_VEBOX},
+ { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX},
+ },
+};
+
+extern struct hw_context *gen8_dec_hw_context_init(VADriverContextP, struct object_config *);
+extern struct hw_context *gen8_enc_hw_context_init(VADriverContextP, struct object_config *);
+extern void gen8_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *);
+static struct hw_codec_info bdw_hw_codec_info = {
+ .dec_hw_context_init = gen8_dec_hw_context_init,
+ .enc_hw_context_init = gen8_enc_hw_context_init,
+ .proc_hw_context_init = gen75_proc_context_init,
+ .render_init = gen8_render_init,
+ .post_processing_context_init = gen8_post_processing_context_init,
+
+ .max_width = 4096,
+ .max_height = 4096,
+ .min_linear_wpitch = 64,
+ .min_linear_hpitch = 16,
+
+ .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) |
+ VA_PROFILE_MASK(H264MultiviewHigh)),
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
+ .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
+
+ .has_mpeg2_decoding = 1,
+ .has_mpeg2_encoding = 1,
+ .has_h264_decoding = 1,
+ .has_h264_encoding = 1,
+ .has_vc1_decoding = 1,
+ .has_jpeg_decoding = 1,
+ .has_vpp = 1,
+ .has_accelerated_getimage = 1,
+ .has_accelerated_putimage = 1,
+ .has_tiled_surface = 1,
+ .has_di_motion_adptive = 1,
+ .has_di_motion_compensated = 1,
+ .has_vp8_decoding = 1,
+ .has_h264_mvc_encoding = 1,
+
+ .num_filters = 5,
+ .filters = {
+ { VAProcFilterNoiseReduction, I965_RING_VEBOX },
+ { VAProcFilterDeinterlacing, I965_RING_VEBOX },
+ { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */
+ { VAProcFilterColorBalance, I965_RING_VEBOX},
+ { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX},
+ },
+};
+
+static struct hw_codec_info chv_hw_codec_info = {
+ .dec_hw_context_init = gen8_dec_hw_context_init,
+ .enc_hw_context_init = gen8_enc_hw_context_init,
+ .proc_hw_context_init = gen75_proc_context_init,
+ .render_init = gen8_render_init,
+ .post_processing_context_init = gen8_post_processing_context_init,
+
+ .max_width = 4096,
+ .max_height = 4096,
+ .min_linear_wpitch = 64,
+ .min_linear_hpitch = 16,
+
+ .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) |
+ VA_PROFILE_MASK(H264MultiviewHigh)),
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
+ .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
+
+ .has_mpeg2_decoding = 1,
+ .has_mpeg2_encoding = 1,
+ .has_h264_decoding = 1,
+ .has_h264_encoding = 1,
+ .has_vc1_decoding = 1,
+ .has_jpeg_decoding = 1,
+ .has_vpp = 1,
+ .has_accelerated_getimage = 1,
+ .has_accelerated_putimage = 1,
+ .has_tiled_surface = 1,
+ .has_di_motion_adptive = 1,
+ .has_di_motion_compensated = 1,
+ .has_vp8_decoding = 1,
+ .has_h264_mvc_encoding = 1,
+
+ .num_filters = 5,
+ .filters = {
+ { VAProcFilterNoiseReduction, I965_RING_VEBOX },
+ { VAProcFilterDeinterlacing, I965_RING_VEBOX },
+ { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */
+ { VAProcFilterColorBalance, I965_RING_VEBOX},
+ { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX},
+ },
+};
+
+struct hw_codec_info *
+i965_get_codec_info(int devid)
+{
+ switch (devid) {
+#undef CHIPSET
+#define CHIPSET(id, family, dev, str) case id: return &family##_hw_codec_info;
+#include "i965_pciids.h"
+ default:
+ return NULL;
+ }
+}
+
+static const struct intel_device_info g4x_device_info = {
+ .gen = 4,
+
+ .urb_size = 384,
+ .max_wm_threads = 50, /* 10 * 5 */
+
+ .is_g4x = 1,
+};
+
+static const struct intel_device_info ilk_device_info = {
+ .gen = 5,
+
+ .urb_size = 1024,
+ .max_wm_threads = 72, /* 12 * 6 */
+};
+
+static const struct intel_device_info snb_gt1_device_info = {
+ .gen = 6,
+ .gt = 1,
+
+ .urb_size = 1024,
+ .max_wm_threads = 40,
+};
+
+static const struct intel_device_info snb_gt2_device_info = {
+ .gen = 6,
+ .gt = 2,
+
+ .urb_size = 1024,
+ .max_wm_threads = 80,
+};
+
+static const struct intel_device_info ivb_gt1_device_info = {
+ .gen = 7,
+ .gt = 1,
+
+ .urb_size = 4096,
+ .max_wm_threads = 48,
+
+ .is_ivybridge = 1,
+};
+
+static const struct intel_device_info ivb_gt2_device_info = {
+ .gen = 7,
+ .gt = 2,
+
+ .urb_size = 4096,
+ .max_wm_threads = 172,
+
+ .is_ivybridge = 1,
+};
+
+static const struct intel_device_info byt_device_info = {
+ .gen = 7,
+ .gt = 1,
+
+ .urb_size = 4096,
+ .max_wm_threads = 48,
+
+ .is_ivybridge = 1,
+ .is_baytrail = 1,
+};
+
+static const struct intel_device_info hsw_gt1_device_info = {
+ .gen = 7,
+ .gt = 1,
+
+ .urb_size = 4096,
+ .max_wm_threads = 102,
+
+ .is_haswell = 1,
+};
+
+static const struct intel_device_info hsw_gt2_device_info = {
+ .gen = 7,
+ .gt = 2,
+
+ .urb_size = 4096,
+ .max_wm_threads = 204,
+
+ .is_haswell = 1,
+};
+
+static const struct intel_device_info hsw_gt3_device_info = {
+ .gen = 7,
+ .gt = 3,
+
+ .urb_size = 4096,
+ .max_wm_threads = 408,
+
+ .is_haswell = 1,
+};
+
+static const struct intel_device_info bdw_device_info = {
+ .gen = 8,
+
+ .urb_size = 4096,
+ .max_wm_threads = 64, /* per PSD */
+};
+
+static const struct intel_device_info chv_device_info = {
+ .gen = 8,
+
+ .urb_size = 4096,
+ .max_wm_threads = 64, /* per PSD */
+
+ .is_cherryview = 1,
+};
+
+const struct intel_device_info *
+i965_get_device_info(int devid)
+{
+ switch (devid) {
+#undef CHIPSET
+#define CHIPSET(id, family, dev, str) case id: return &dev##_device_info;
+#include "i965_pciids.h"
+ default:
+ return NULL;
+ }
+}
+
+static void cpuid(unsigned int op,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ __cpuid_count(op, 0, *eax, *ebx, *ecx, *edx);
+}
+
+/*
+ * This function doesn't check the length. And the caller should
+ * assure that the length of input string should be greater than 48.
+ */
+static int intel_driver_detect_cpustring(char *model_id)
+{
+ uint32_t *rdata;
+
+ if (model_id == NULL)
+ return -EINVAL;
+
+ rdata = (uint32_t *)model_id;
+
+ /* obtain the max supported extended CPUID info */
+ cpuid(0x80000000, &rdata[0], &rdata[1], &rdata[2], &rdata[3]);
+
+ /* If the max extended CPUID info is less than 0x80000004, fail */
+ if (rdata[0] < 0x80000004)
+ return -EINVAL;
+
+ /* obtain the CPUID string */
+ cpuid(0x80000002, &rdata[0], &rdata[1], &rdata[2], &rdata[3]);
+ cpuid(0x80000003, &rdata[4], &rdata[5], &rdata[6], &rdata[7]);
+ cpuid(0x80000004, &rdata[8], &rdata[9], &rdata[10], &rdata[11]);
+
+ *(model_id + 48) = '\0';
+ return 0;
+}
+
+/*
+ * the hook_list for HSW.
+ * It is captured by /proc/cpuinfo and the space character is stripped.
+ */
+const static char *hsw_cpu_hook_list[] = {
+"Intel(R)Pentium(R)3556U",
+"Intel(R)Pentium(R)3560Y",
+"Intel(R)Pentium(R)3550M",
+"Intel(R)Celeron(R)2980U",
+"Intel(R)Celeron(R)2955U",
+"Intel(R)Celeron(R)2950M",
+};
+
+static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info)
+{
+ char model_string[64];
+ char *model_ptr, *tmp_ptr;
+ int i, model_len, list_len;
+ bool found;
+
+ memset(model_string, 0, sizeof(model_string));
+
+ /* If it can't detect cpu model_string, leave it alone */
+ if (intel_driver_detect_cpustring(model_string))
+ return;
+
+ /* strip the cpufreq info */
+ model_ptr = model_string;
+ tmp_ptr = strstr(model_ptr, "@");
+
+ if (tmp_ptr)
+ *tmp_ptr = '\0';
+
+ /* strip the space character and convert to the lower case */
+ model_ptr = model_string;
+ model_len = strlen(model_string);
+ for (i = 0; i < model_len; i++) {
+ if (model_string[i] != ' ') {
+ *model_ptr = model_string[i];
+ model_ptr++;
+ }
+ }
+ *model_ptr = '\0';
+
+ found = false;
+ list_len = sizeof(hsw_cpu_hook_list) / sizeof(char *);
+ model_len = strlen(model_string);
+ for (i = 0; i < list_len; i++) {
+ model_ptr = (char *)hsw_cpu_hook_list[i];
+
+ if (strlen(model_ptr) != model_len)
+ continue;
+
+ if (strncasecmp(model_string, model_ptr, model_len) == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ codec_info->has_h264_encoding = 0;
+ codec_info->has_h264_mvc_encoding = 0;
+ codec_info->has_mpeg2_encoding = 0;
+ }
+ return;
+}
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 992bfa5..aa521e5 100755..100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -28,6 +28,7 @@
*/
#include "sysdeps.h"
+#include <unistd.h>
#ifdef HAVE_VA_X11
# include "i965_output_dri.h"
@@ -37,6 +38,7 @@
# include "i965_output_wayland.h"
#endif
+#include "intel_version.h"
#include "intel_driver.h"
#include "intel_memman.h"
#include "intel_batchbuffer.h"
@@ -78,6 +80,22 @@
#define HAS_TILED_SURFACE(ctx) ((ctx)->codec_info->has_tiled_surface)
+#define HAS_VP8_DECODING(ctx) ((ctx)->codec_info->has_vp8_decoding && \
+ (ctx)->intel.has_bsd)
+
+#define HAS_VP8_ENCODING(ctx) ((ctx)->codec_info->has_vp8_encoding && \
+ (ctx)->intel.has_bsd)
+
+#define HAS_H264_MVC_DECODING(ctx) \
+ (HAS_H264_DECODING(ctx) && (ctx)->codec_info->h264_mvc_dec_profiles)
+
+#define HAS_H264_MVC_DECODING_PROFILE(ctx, profile) \
+ (HAS_H264_MVC_DECODING(ctx) && \
+ ((ctx)->codec_info->h264_mvc_dec_profiles & (1U << profile)))
+
+#define HAS_H264_MVC_ENCODING(ctx) ((ctx)->codec_info->has_h264_mvc_encoding && \
+ (ctx)->intel.has_bsd)
+
static int get_sampling_from_fourcc(unsigned int fourcc);
/* Check whether we are rendering to X11 (VA/X11 or VA/GLX API) */
@@ -88,6 +106,117 @@ static int get_sampling_from_fourcc(unsigned int fourcc);
#define IS_VA_WAYLAND(ctx) \
(((ctx)->display_type & VA_DISPLAY_MAJOR_MASK) == VA_DISPLAY_WAYLAND)
+#define I965_BIT 1
+#define I965_2BITS (I965_BIT << 1)
+#define I965_4BITS (I965_BIT << 2)
+#define I965_8BITS (I965_BIT << 3)
+#define I965_16BITS (I965_BIT << 4)
+#define I965_32BITS (I965_BIT << 5)
+
+#define PLANE_0 0
+#define PLANE_1 1
+#define PLANE_2 2
+
+#define OFFSET_0 0
+#define OFFSET_4 4
+#define OFFSET_8 8
+#define OFFSET_16 16
+#define OFFSET_24 24
+
+/* hfactor, vfactor, num_planes, bpp[], num_components, components[] */
+#define I_NV12 2, 2, 2, {I965_8BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_1, OFFSET_8} }
+#define I_I420 2, 2, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+#define I_IYUV I_I420
+#define I_IMC3 I_I420
+#define I_YV12 2, 2, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} }
+#define I_IMC1 I_YV12
+
+#define I_422H 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+#define I_422V 1, 2, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+#define I_YV16 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} }
+#define I_YUY2 2, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_24} }
+#define I_UYVY 2, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16} }
+
+#define I_444P 1, 1, 3, {I965_8BITS, I965_8BITS, I965_8BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+
+#define I_411P 4, 1, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+
+#define I_Y800 1, 1, 1, {I965_8BITS}, 1, { {PLANE_0, OFFSET_0} }
+
+#define I_RGBA 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_24} }
+#define I_RGBX 1, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16} }
+#define I_BGRA 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_24} }
+#define I_BGRX 1, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} }
+
+#define I_ARGB 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_24}, {PLANE_0, OFFSET_0} }
+#define I_ABGR 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_24}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} }
+
+#define I_IA88 1, 1, 1, {I965_16BITS}, 2, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8} }
+#define I_AI88 1, 1, 1, {I965_16BITS}, 2, { {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} }
+
+#define I_IA44 1, 1, 1, {I965_8BITS}, 2, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_4} }
+#define I_AI44 1, 1, 1, {I965_8BITS}, 2, { {PLANE_0, OFFSET_4}, {PLANE_0, OFFSET_0} }
+
+/* flag */
+#define I_S 1
+#define I_I 2
+#define I_SI (I_S | I_I)
+
+#define DEF_FOUCC_INFO(FOURCC, FORMAT, SUB, FLAG) { VA_FOURCC_##FOURCC, I965_COLOR_##FORMAT, SUBSAMPLE_##SUB, FLAG, I_##FOURCC }
+#define DEF_YUV(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, YUV, SUB, FLAG)
+#define DEF_RGB(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, RGB, SUB, FLAG)
+#define DEF_INDEX(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, INDEX, SUB, FLAG)
+
+static const i965_fourcc_info i965_fourcc_infos[] = {
+ DEF_YUV(NV12, YUV420, I_SI),
+ DEF_YUV(I420, YUV420, I_SI),
+ DEF_YUV(IYUV, YUV420, I_S),
+ DEF_YUV(IMC3, YUV420, I_S),
+ DEF_YUV(YV12, YUV420, I_SI),
+ DEF_YUV(IMC1, YUV420, I_S),
+
+ DEF_YUV(422H, YUV422H, I_SI),
+ DEF_YUV(422V, YUV422V, I_S),
+ DEF_YUV(YV16, YUV422H, I_S),
+ DEF_YUV(YUY2, YUV422H, I_SI),
+ DEF_YUV(UYVY, YUV422H, I_SI),
+
+ DEF_YUV(444P, YUV444, I_S),
+
+ DEF_YUV(411P, YUV411, I_S),
+
+ DEF_YUV(Y800, YUV400, I_S),
+
+ DEF_RGB(RGBA, RGBX, I_SI),
+ DEF_RGB(RGBX, RGBX, I_SI),
+ DEF_RGB(BGRA, RGBX, I_SI),
+ DEF_RGB(BGRX, RGBX, I_SI),
+
+ DEF_RGB(ARGB, RGBX, I_I),
+ DEF_RGB(ABGR, RGBX, I_I),
+
+ DEF_INDEX(IA88, RGBX, I_I),
+ DEF_INDEX(AI88, RGBX, I_I),
+
+ DEF_INDEX(IA44, RGBX, I_I),
+ DEF_INDEX(AI44, RGBX, I_I)
+};
+
+const i965_fourcc_info *
+get_fourcc_info(unsigned int fourcc)
+{
+ unsigned int i;
+
+ for (i = 0; ARRAY_ELEMS(i965_fourcc_infos); i++) {
+ const i965_fourcc_info * const info = &i965_fourcc_infos[i];
+
+ if (info->fourcc == fourcc)
+ return info;
+ }
+
+ return NULL;
+}
+
enum {
I965_SURFACETYPE_RGBA = 1,
I965_SURFACETYPE_YUV,
@@ -97,6 +226,30 @@ enum {
/* List of supported display attributes */
static const VADisplayAttribute i965_display_attributes[] = {
{
+ VADisplayAttribBrightness,
+ -100, 100, DEFAULT_BRIGHTNESS,
+ VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE
+ },
+
+ {
+ VADisplayAttribContrast,
+ 0, 100, DEFAULT_CONTRAST,
+ VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE
+ },
+
+ {
+ VADisplayAttribHue,
+ -180, 180, DEFAULT_HUE,
+ VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE
+ },
+
+ {
+ VADisplayAttribSaturation,
+ 0, 100, DEFAULT_SATURATION,
+ VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE
+ },
+
+ {
VADisplayAttribRotation,
0, 3, VA_ROTATION_NONE,
VA_DISPLAY_ATTRIB_GETTABLE|VA_DISPLAY_ATTRIB_SETTABLE
@@ -112,19 +265,21 @@ typedef struct {
static const i965_image_format_map_t
i965_image_formats_map[I965_MAX_IMAGE_FORMATS + 1] = {
{ I965_SURFACETYPE_YUV,
- { VA_FOURCC('Y','V','1','2'), VA_LSB_FIRST, 12, } },
+ { VA_FOURCC_YV12, VA_LSB_FIRST, 12, } },
{ I965_SURFACETYPE_YUV,
- { VA_FOURCC('I','4','2','0'), VA_LSB_FIRST, 12, } },
+ { VA_FOURCC_I420, VA_LSB_FIRST, 12, } },
{ I965_SURFACETYPE_YUV,
- { VA_FOURCC('N','V','1','2'), VA_LSB_FIRST, 12, } },
+ { VA_FOURCC_NV12, VA_LSB_FIRST, 12, } },
{ I965_SURFACETYPE_YUV,
- { VA_FOURCC('Y','U','Y','2'), VA_LSB_FIRST, 16, } },
+ { VA_FOURCC_YUY2, VA_LSB_FIRST, 16, } },
{ I965_SURFACETYPE_YUV,
- { VA_FOURCC('U','Y','V','Y'), VA_LSB_FIRST, 16, } },
+ { VA_FOURCC_UYVY, VA_LSB_FIRST, 16, } },
+ { I965_SURFACETYPE_YUV,
+ { VA_FOURCC_422H, VA_LSB_FIRST, 16, } },
{ I965_SURFACETYPE_RGBA,
- { VA_FOURCC('R','G','B','X'), VA_LSB_FIRST, 32, 24, 0x000000ff, 0x0000ff00, 0x00ff0000 } },
+ { VA_FOURCC_RGBX, VA_LSB_FIRST, 32, 24, 0x000000ff, 0x0000ff00, 0x00ff0000 } },
{ I965_SURFACETYPE_RGBA,
- { VA_FOURCC('B','G','R','X'), VA_LSB_FIRST, 32, 24, 0x00ff0000, 0x0000ff00, 0x000000ff } },
+ { VA_FOURCC_BGRX, VA_LSB_FIRST, 32, 24, 0x00ff0000, 0x0000ff00, 0x000000ff } },
};
/* List of supported subpicture formats */
@@ -142,23 +297,23 @@ typedef struct {
static const i965_subpic_format_map_t
i965_subpic_formats_map[I965_MAX_SUBPIC_FORMATS + 1] = {
{ I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_P4A4_UNORM,
- { VA_FOURCC('I','A','4','4'), VA_MSB_FIRST, 8, },
+ { VA_FOURCC_IA44, VA_MSB_FIRST, 8, },
COMMON_SUBPICTURE_FLAGS },
{ I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_A4P4_UNORM,
- { VA_FOURCC('A','I','4','4'), VA_MSB_FIRST, 8, },
+ { VA_FOURCC_AI44, VA_MSB_FIRST, 8, },
COMMON_SUBPICTURE_FLAGS },
{ I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_P8A8_UNORM,
- { VA_FOURCC('I','A','8','8'), VA_MSB_FIRST, 16, },
+ { VA_FOURCC_IA88, VA_MSB_FIRST, 16, },
COMMON_SUBPICTURE_FLAGS },
{ I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_A8P8_UNORM,
- { VA_FOURCC('A','I','8','8'), VA_MSB_FIRST, 16, },
+ { VA_FOURCC_AI88, VA_MSB_FIRST, 16, },
COMMON_SUBPICTURE_FLAGS },
{ I965_SURFACETYPE_RGBA, I965_SURFACEFORMAT_B8G8R8A8_UNORM,
- { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32,
+ { VA_FOURCC_BGRA, VA_LSB_FIRST, 32,
32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 },
COMMON_SUBPICTURE_FLAGS },
{ I965_SURFACETYPE_RGBA, I965_SURFACEFORMAT_R8G8B8A8_UNORM,
- { VA_FOURCC('R','G','B','A'), VA_LSB_FIRST, 32,
+ { VA_FOURCC_RGBA, VA_LSB_FIRST, 32,
32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 },
COMMON_SUBPICTURE_FLAGS },
};
@@ -181,117 +336,36 @@ get_subpic_format(const VAImageFormat *va_format)
return NULL;
}
-extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *);
-extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *);
-static struct hw_codec_info g4x_hw_codec_info = {
- .dec_hw_context_init = g4x_dec_hw_context_init,
- .enc_hw_context_init = NULL,
- .proc_hw_context_init = NULL,
- .max_width = 2048,
- .max_height = 2048,
-
- .has_mpeg2_decoding = 1,
- .has_h264_decoding = 1,
-
- .num_filters = 0,
-};
-
-extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *);
-static struct hw_codec_info ironlake_hw_codec_info = {
- .dec_hw_context_init = ironlake_dec_hw_context_init,
- .enc_hw_context_init = NULL,
- .proc_hw_context_init = i965_proc_context_init,
- .max_width = 2048,
- .max_height = 2048,
+/* Checks whether the surface is in busy state */
+static bool
+is_surface_busy(struct i965_driver_data *i965,
+ struct object_surface *obj_surface)
+{
+ assert(obj_surface != NULL);
- .has_mpeg2_decoding = 1,
- .has_h264_decoding = 1,
- .has_vpp = 1,
- .has_accelerated_putimage = 1,
+ if (obj_surface->locked_image_id != VA_INVALID_ID)
+ return true;
+ if (obj_surface->derived_image_id != VA_INVALID_ID)
+ return true;
+ return false;
+}
- .num_filters = 0,
-};
+/* Checks whether the image is in busy state */
+static bool
+is_image_busy(struct i965_driver_data *i965, struct object_image *obj_image)
+{
+ struct object_buffer *obj_buffer;
-extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *);
-extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *);
-static struct hw_codec_info gen6_hw_codec_info = {
- .dec_hw_context_init = gen6_dec_hw_context_init,
- .enc_hw_context_init = gen6_enc_hw_context_init,
- .proc_hw_context_init = i965_proc_context_init,
- .max_width = 2048,
- .max_height = 2048,
-
- .has_mpeg2_decoding = 1,
- .has_mpeg2_encoding = 1,
- .has_h264_decoding = 1,
- .has_h264_encoding = 1,
- .has_vc1_decoding = 1,
- .has_vpp = 1,
- .has_accelerated_getimage = 1,
- .has_accelerated_putimage = 1,
- .has_tiled_surface = 1,
-
- .num_filters = 2,
- .filters = {
- VAProcFilterNoiseReduction,
- VAProcFilterDeinterlacing,
- },
-};
+ assert(obj_image != NULL);
-extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *);
-extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *);
-static struct hw_codec_info gen7_hw_codec_info = {
- .dec_hw_context_init = gen7_dec_hw_context_init,
- .enc_hw_context_init = gen7_enc_hw_context_init,
- .proc_hw_context_init = i965_proc_context_init,
- .max_width = 4096,
- .max_height = 4096,
-
- .has_mpeg2_decoding = 1,
- .has_mpeg2_encoding = 1,
- .has_h264_decoding = 1,
- .has_h264_encoding = 1,
- .has_vc1_decoding = 1,
- .has_jpeg_decoding = 1,
- .has_vpp = 1,
- .has_accelerated_getimage = 1,
- .has_accelerated_putimage = 1,
- .has_tiled_surface = 1,
-
- .num_filters = 2,
- .filters = {
- VAProcFilterNoiseReduction,
- VAProcFilterDeinterlacing,
- },
-};
+ if (obj_image->derived_surface != VA_INVALID_ID)
+ return true;
-extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *);
-static struct hw_codec_info gen75_hw_codec_info = {
- .dec_hw_context_init = gen75_dec_hw_context_init,
- .enc_hw_context_init = gen75_enc_hw_context_init,
- .proc_hw_context_init = gen75_proc_context_init,
- .max_width = 4096,
- .max_height = 4096,
-
- .has_mpeg2_decoding = 1,
- .has_mpeg2_encoding = 1,
- .has_h264_decoding = 1,
- .has_h264_encoding = 1,
- .has_vc1_decoding = 1,
- .has_jpeg_decoding = 1,
- .has_vpp = 1,
- .has_accelerated_getimage = 1,
- .has_accelerated_putimage = 1,
- .has_tiled_surface = 1,
- .has_di_motion_adptive = 1,
- .num_filters = 4,
- .filters = {
- VAProcFilterNoiseReduction,
- VAProcFilterDeinterlacing,
- VAProcFilterSharpening,
- VAProcFilterColorBalance,
- },
-};
+ obj_buffer = BUFFER(obj_image->image.buf);
+ if (obj_buffer && obj_buffer->export_refcount > 0)
+ return true;
+ return false;
+}
#define I965_PACKED_HEADER_BASE 0
#define I965_PACKED_MISC_HEADER_BASE 3
@@ -304,7 +378,7 @@ va_enc_packed_type_to_idx(int packed_type)
if (packed_type & VAEncPackedHeaderMiscMask) {
idx = I965_PACKED_MISC_HEADER_BASE;
packed_type = (~VAEncPackedHeaderMiscMask & packed_type);
- assert(packed_type > 0);
+ ASSERT_RET(packed_type > 0, 0);
idx += (packed_type - 1);
} else {
idx = I965_PACKED_HEADER_BASE;
@@ -324,16 +398,15 @@ va_enc_packed_type_to_idx(int packed_type)
default:
/* Should not get here */
- assert(0);
+ ASSERT_RET(0, 0);
break;
}
}
- assert(idx < 4);
+ ASSERT_RET(idx < 4, 0);
return idx;
}
-
VAStatus
i965_QueryConfigProfiles(VADriverContextP ctx,
VAProfile *profile_list, /* out */
@@ -350,10 +423,14 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
if (HAS_H264_DECODING(i965) ||
HAS_H264_ENCODING(i965)) {
- profile_list[i++] = VAProfileH264Baseline;
+ profile_list[i++] = VAProfileH264ConstrainedBaseline;
profile_list[i++] = VAProfileH264Main;
profile_list[i++] = VAProfileH264High;
}
+ if (HAS_H264_MVC_DECODING_PROFILE(i965, VAProfileH264MultiviewHigh))
+ profile_list[i++] = VAProfileH264MultiviewHigh;
+ if (HAS_H264_MVC_DECODING_PROFILE(i965, VAProfileH264StereoHigh))
+ profile_list[i++] = VAProfileH264StereoHigh;
if (HAS_VC1_DECODING(i965)) {
profile_list[i++] = VAProfileVC1Simple;
@@ -369,8 +446,18 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
profile_list[i++] = VAProfileJPEGBaseline;
}
+ if (HAS_VP8_DECODING(i965) ||
+ HAS_VP8_ENCODING(i965)) {
+ profile_list[i++] = VAProfileVP8Version0_3;
+ }
+
+ if (HAS_H264_MVC_ENCODING(i965)) {
+ profile_list[i++] = VAProfileH264MultiviewHigh;
+ profile_list[i++] = VAProfileH264StereoHigh;
+ }
+
/* If the assert fails then I965_MAX_PROFILES needs to be bigger */
- assert(i <= I965_MAX_PROFILES);
+ ASSERT_RET(i <= I965_MAX_PROFILES, VA_STATUS_ERROR_OPERATION_FAILED);
*num_profiles = i;
return VA_STATUS_SUCCESS;
@@ -396,16 +483,24 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx,
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
if (HAS_H264_DECODING(i965))
entrypoint_list[n++] = VAEntrypointVLD;
-
+
if (HAS_H264_ENCODING(i965))
entrypoint_list[n++] = VAEntrypointEncSlice;
break;
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
+ if (HAS_H264_MVC_DECODING_PROFILE(i965, profile))
+ entrypoint_list[n++] = VAEntrypointVLD;
+
+ if (HAS_H264_MVC_ENCODING(i965))
+ entrypoint_list[n++] = VAEntrypointEncSlice;
+ break;
case VAProfileVC1Simple:
case VAProfileVC1Main:
@@ -424,16 +519,139 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx,
entrypoint_list[n++] = VAEntrypointVLD;
break;
+ case VAProfileVP8Version0_3:
+ if (HAS_VP8_DECODING(i965))
+ entrypoint_list[n++] = VAEntrypointVLD;
+
+ if (HAS_VP8_ENCODING(i965))
+ entrypoint_list[n++] = VAEntrypointEncSlice;
+
default:
break;
}
/* If the assert fails then I965_MAX_ENTRYPOINTS needs to be bigger */
- assert(n <= I965_MAX_ENTRYPOINTS);
+ ASSERT_RET(n <= I965_MAX_ENTRYPOINTS, VA_STATUS_ERROR_OPERATION_FAILED);
*num_entrypoints = n;
return n > 0 ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
}
+static VAStatus
+i965_validate_config(VADriverContextP ctx, VAProfile profile,
+ VAEntrypoint entrypoint)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ VAStatus va_status;
+
+ /* Validate profile & entrypoint */
+ switch (profile) {
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ if ((HAS_MPEG2_DECODING(i965) && entrypoint == VAEntrypointVLD) ||
+ (HAS_MPEG2_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) {
+ va_status = VA_STATUS_SUCCESS;
+ } else {
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+ break;
+
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ if ((HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD) ||
+ (HAS_H264_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) {
+ va_status = VA_STATUS_SUCCESS;
+ } else {
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+ break;
+
+ case VAProfileVC1Simple:
+ case VAProfileVC1Main:
+ case VAProfileVC1Advanced:
+ if (HAS_VC1_DECODING(i965) && entrypoint == VAEntrypointVLD) {
+ va_status = VA_STATUS_SUCCESS;
+ } else {
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+ break;
+
+ case VAProfileNone:
+ if (HAS_VPP(i965) && VAEntrypointVideoProc == entrypoint) {
+ va_status = VA_STATUS_SUCCESS;
+ } else {
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+ break;
+
+ case VAProfileJPEGBaseline:
+ if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) {
+ va_status = VA_STATUS_SUCCESS;
+ } else {
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+ break;
+
+ case VAProfileVP8Version0_3:
+ if ((HAS_VP8_DECODING(i965) && entrypoint == VAEntrypointVLD) ||
+ (HAS_VP8_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) {
+ va_status = VA_STATUS_SUCCESS;
+ } else {
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+ break;
+
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
+ if ((HAS_H264_MVC_DECODING_PROFILE(i965, profile) &&
+ entrypoint == VAEntrypointVLD) ||
+ (HAS_H264_MVC_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) {
+ va_status = VA_STATUS_SUCCESS;
+ } else {
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+
+ break;
+
+ default:
+ va_status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+ break;
+ }
+ return va_status;
+}
+
+static uint32_t
+i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile,
+ VAEntrypoint entrypoint)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ uint32_t chroma_formats = VA_RT_FORMAT_YUV420;
+
+ switch (profile) {
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ if (HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD)
+ chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
+ break;
+
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
+ if (HAS_H264_MVC_DECODING(i965) && entrypoint == VAEntrypointVLD)
+ chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
+ break;
+
+ case VAProfileJPEGBaseline:
+ if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
+ chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats;
+ break;
+
+ default:
+ break;
+ }
+ return chroma_formats;
+}
+
VAStatus
i965_GetConfigAttributes(VADriverContextP ctx,
VAProfile profile,
@@ -441,25 +659,44 @@ i965_GetConfigAttributes(VADriverContextP ctx,
VAConfigAttrib *attrib_list, /* in/out */
int num_attribs)
{
+ VAStatus va_status;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
int i;
+ va_status = i965_validate_config(ctx, profile, entrypoint);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
/* Other attributes don't seem to be defined */
/* What to do if we don't know the attribute? */
for (i = 0; i < num_attribs; i++) {
switch (attrib_list[i].type) {
case VAConfigAttribRTFormat:
- attrib_list[i].value = VA_RT_FORMAT_YUV420;
+ attrib_list[i].value = i965_get_default_chroma_formats(ctx,
+ profile, entrypoint);
break;
case VAConfigAttribRateControl:
if (entrypoint == VAEntrypointEncSlice) {
- attrib_list[i].value = VA_RC_CBR | VA_RC_CQP;
+ attrib_list[i].value = VA_RC_CQP;
+
+ if (profile != VAProfileMPEG2Main &&
+ profile != VAProfileMPEG2Simple)
+ attrib_list[i].value |= VA_RC_CBR;
break;
}
case VAConfigAttribEncPackedHeaders:
if (entrypoint == VAEntrypointEncSlice) {
attrib_list[i].value = VA_ENC_PACKED_HEADER_SEQUENCE | VA_ENC_PACKED_HEADER_PICTURE | VA_ENC_PACKED_HEADER_MISC;
+ if (profile == VAProfileH264ConstrainedBaseline ||
+ profile == VAProfileH264Main ||
+ profile == VAProfileH264High ||
+ profile == VAProfileH264StereoHigh ||
+ profile == VAProfileH264MultiviewHigh) {
+ attrib_list[i].value |= (VA_ENC_PACKED_HEADER_RAW_DATA |
+ VA_ENC_PACKED_HEADER_SLICE);
+ }
break;
}
@@ -469,6 +706,16 @@ i965_GetConfigAttributes(VADriverContextP ctx,
break;
}
+ case VAConfigAttribEncQualityRange:
+ if (entrypoint == VAEntrypointEncSlice) {
+ attrib_list[i].value = 1;
+ if (profile == VAProfileH264ConstrainedBaseline ||
+ profile == VAProfileH264Main ||
+ profile == VAProfileH264High )
+ attrib_list[i].value = ENCODER_QUALITY_RANGE;
+ break;
+ }
+
default:
/* Do nothing */
attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED;
@@ -485,29 +732,49 @@ i965_destroy_config(struct object_heap *heap, struct object_base *obj)
object_heap_free(heap, obj);
}
-static VAStatus
-i965_update_attribute(struct object_config *obj_config, VAConfigAttrib *attrib)
+static VAConfigAttrib *
+i965_lookup_config_attribute(struct object_config *obj_config,
+ VAConfigAttribType type)
{
int i;
- /* Check existing attrbiutes */
for (i = 0; i < obj_config->num_attribs; i++) {
- if (obj_config->attrib_list[i].type == attrib->type) {
- /* Update existing attribute */
- obj_config->attrib_list[i].value = attrib->value;
- return VA_STATUS_SUCCESS;
- }
+ VAConfigAttrib * const attrib = &obj_config->attrib_list[i];
+ if (attrib->type == type)
+ return attrib;
}
+ return NULL;
+}
+
+static VAStatus
+i965_append_config_attribute(struct object_config *obj_config,
+ const VAConfigAttrib *new_attrib)
+{
+ VAConfigAttrib *attrib;
+
+ if (obj_config->num_attribs >= I965_MAX_CONFIG_ATTRIBUTES)
+ return VA_STATUS_ERROR_MAX_NUM_EXCEEDED;
+
+ attrib = &obj_config->attrib_list[obj_config->num_attribs++];
+ attrib->type = new_attrib->type;
+ attrib->value = new_attrib->value;
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+i965_ensure_config_attribute(struct object_config *obj_config,
+ const VAConfigAttrib *new_attrib)
+{
+ VAConfigAttrib *attrib;
- if (obj_config->num_attribs < I965_MAX_CONFIG_ATTRIBUTES) {
- i = obj_config->num_attribs;
- obj_config->attrib_list[i].type = attrib->type;
- obj_config->attrib_list[i].value = attrib->value;
- obj_config->num_attribs++;
+ /* Check for existing attributes */
+ attrib = i965_lookup_config_attribute(obj_config, new_attrib->type);
+ if (attrib) {
+ /* Update existing attribute */
+ attrib->value = new_attrib->value;
return VA_STATUS_SUCCESS;
}
-
- return VA_STATUS_ERROR_MAX_NUM_EXCEEDED;
+ return i965_append_config_attribute(obj_config, new_attrib);
}
VAStatus
@@ -524,63 +791,7 @@ i965_CreateConfig(VADriverContextP ctx,
int i;
VAStatus vaStatus;
- /* Validate profile & entrypoint */
- switch (profile) {
- case VAProfileMPEG2Simple:
- case VAProfileMPEG2Main:
- if ((HAS_MPEG2_DECODING(i965) && VAEntrypointVLD == entrypoint) ||
- (HAS_MPEG2_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) {
- vaStatus = VA_STATUS_SUCCESS;
- } else {
- vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
- }
- break;
-
- case VAProfileH264Baseline:
- case VAProfileH264Main:
- case VAProfileH264High:
- if ((HAS_H264_DECODING(i965) && VAEntrypointVLD == entrypoint) ||
- (HAS_H264_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) {
- vaStatus = VA_STATUS_SUCCESS;
- } else {
- vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
- }
-
- break;
-
- case VAProfileVC1Simple:
- case VAProfileVC1Main:
- case VAProfileVC1Advanced:
- if (HAS_VC1_DECODING(i965) && VAEntrypointVLD == entrypoint) {
- vaStatus = VA_STATUS_SUCCESS;
- } else {
- vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
- }
-
- break;
-
- case VAProfileNone:
- if (HAS_VPP(i965) && VAEntrypointVideoProc == entrypoint) {
- vaStatus = VA_STATUS_SUCCESS;
- } else {
- vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
- }
-
- break;
-
- case VAProfileJPEGBaseline:
- if (HAS_JPEG_DECODING(i965) && VAEntrypointVLD == entrypoint) {
- vaStatus = VA_STATUS_SUCCESS;
- } else {
- vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
- }
-
- break;
-
- default:
- vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
- break;
- }
+ vaStatus = i965_validate_config(ctx, profile, entrypoint);
if (VA_STATUS_SUCCESS != vaStatus) {
return vaStatus;
@@ -596,16 +807,23 @@ i965_CreateConfig(VADriverContextP ctx,
obj_config->profile = profile;
obj_config->entrypoint = entrypoint;
- obj_config->attrib_list[0].type = VAConfigAttribRTFormat;
- obj_config->attrib_list[0].value = VA_RT_FORMAT_YUV420;
- obj_config->num_attribs = 1;
-
- for(i = 0; i < num_attribs; i++) {
- vaStatus = i965_update_attribute(obj_config, &(attrib_list[i]));
+ obj_config->num_attribs = 0;
- if (VA_STATUS_SUCCESS != vaStatus) {
+ for (i = 0; i < num_attribs; i++) {
+ vaStatus = i965_ensure_config_attribute(obj_config, &attrib_list[i]);
+ if (vaStatus != VA_STATUS_SUCCESS)
break;
- }
+ }
+
+ if (vaStatus == VA_STATUS_SUCCESS) {
+ VAConfigAttrib attrib, *attrib_found;
+ attrib.type = VAConfigAttribRTFormat;
+ attrib.value = i965_get_default_chroma_formats(ctx, profile, entrypoint);
+ attrib_found = i965_lookup_config_attribute(obj_config, attrib.type);
+ if (!attrib_found || !attrib_found->value)
+ vaStatus = i965_append_config_attribute(obj_config, &attrib);
+ else if (!(attrib_found->value & attrib.value))
+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
}
/* Error recovery */
@@ -646,7 +864,7 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx,
VAStatus vaStatus = VA_STATUS_SUCCESS;
int i;
- assert(obj_config);
+ ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG);
*profile = obj_config->profile;
*entrypoint = obj_config->entrypoint;
*num_attribs = obj_config->num_attribs;
@@ -658,10 +876,11 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx,
return vaStatus;
}
-static void
-i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
+void
+i965_destroy_surface_storage(struct object_surface *obj_surface)
{
- struct object_surface *obj_surface = (struct object_surface *)obj;
+ if (!obj_surface)
+ return;
dri_bo_unreference(obj_surface->bo);
obj_surface->bo = NULL;
@@ -670,7 +889,14 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
obj_surface->free_private_data(&obj_surface->private_data);
obj_surface->private_data = NULL;
}
+}
+
+static void
+i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
+{
+ struct object_surface *obj_surface = (struct object_surface *)obj;
+ i965_destroy_surface_storage(obj_surface);
object_heap_free(heap, obj);
}
@@ -687,9 +913,10 @@ i965_surface_native_memory(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
// todo, should we disable tiling for 422 format?
- if (expected_fourcc == VA_FOURCC('I', '4', '2', '0') ||
- expected_fourcc == VA_FOURCC('I', 'Y', 'U', 'V') ||
- expected_fourcc == VA_FOURCC('Y', 'V', '1', '2'))
+ if (expected_fourcc == VA_FOURCC_I420 ||
+ expected_fourcc == VA_FOURCC_IYUV ||
+ expected_fourcc == VA_FOURCC_YV12 ||
+ expected_fourcc == VA_FOURCC_YV16)
tiling = 0;
i965_check_alloc_surface_bo(ctx, obj_surface, tiling, expected_fourcc, get_sampling_from_fourcc(expected_fourcc));
@@ -711,9 +938,9 @@ i965_suface_external_memory(VADriverContextP ctx,
index > memory_attibute->num_buffers)
return VA_STATUS_ERROR_INVALID_PARAMETER;
- assert(obj_surface->orig_width == memory_attibute->width);
- assert(obj_surface->orig_height == memory_attibute->height);
- assert(memory_attibute->num_planes >= 1);
+ ASSERT_RET(obj_surface->orig_width == memory_attibute->width, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(obj_surface->orig_height == memory_attibute->height, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->num_planes >= 1, VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->fourcc = memory_attibute->pixel_format;
obj_surface->width = memory_attibute->pitches[0];
@@ -728,9 +955,9 @@ i965_suface_external_memory(VADriverContextP ctx,
obj_surface->x_cr_offset = 0;
switch (obj_surface->fourcc) {
- case VA_FOURCC('N', 'V', '1', '2'):
- assert(memory_attibute->num_planes == 2);
- assert(memory_attibute->pitches[0] == memory_attibute->pitches[1]);
+ case VA_FOURCC_NV12:
+ ASSERT_RET(memory_attibute->num_planes == 2, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->pitches[0] == memory_attibute->pitches[1], VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV420;
obj_surface->y_cb_offset = obj_surface->height;
@@ -741,10 +968,10 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('Y', 'V', '1', '2'):
- case VA_FOURCC('I', 'M', 'C', '1'):
- assert(memory_attibute->num_planes == 3);
- assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]);
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_IMC1:
+ ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV420;
obj_surface->y_cr_offset = obj_surface->height;
@@ -755,11 +982,11 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('I', '4', '2', '0'):
- case VA_FOURCC('I', 'Y', 'U', 'V'):
- case VA_FOURCC('I', 'M', 'C', '3'):
- assert(memory_attibute->num_planes == 3);
- assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]);
+ case VA_FOURCC_I420:
+ case VA_FOURCC_IYUV:
+ case VA_FOURCC_IMC3:
+ ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV420;
obj_surface->y_cb_offset = obj_surface->height;
@@ -770,9 +997,9 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('Y', 'U', 'Y', '2'):
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
- assert(memory_attibute->num_planes == 1);
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
+ ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV422H;
obj_surface->y_cb_offset = 0;
@@ -783,11 +1010,11 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('R', 'G', 'B', 'A'):
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
- case VA_FOURCC('B', 'G', 'R', 'X'):
- assert(memory_attibute->num_planes == 1);
+ case VA_FOURCC_RGBA:
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_BGRX:
+ ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_RGBX;
obj_surface->y_cb_offset = 0;
@@ -798,8 +1025,8 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('Y', '8', '0', '0'): /* monochrome surface */
- assert(memory_attibute->num_planes == 1);
+ case VA_FOURCC_Y800: /* monochrome surface */
+ ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV400;
obj_surface->y_cb_offset = 0;
@@ -810,9 +1037,9 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '1', '1', 'P'):
- assert(memory_attibute->num_planes == 3);
- assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]);
+ case VA_FOURCC_411P:
+ ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV411;
obj_surface->y_cb_offset = 0;
@@ -823,9 +1050,9 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '2', '2', 'H'):
- assert(memory_attibute->num_planes == 3);
- assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]);
+ case VA_FOURCC_422H:
+ ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV422H;
obj_surface->y_cb_offset = obj_surface->height;
@@ -836,11 +1063,24 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '2', '2', 'V'):
+ case VA_FOURCC_YV16:
assert(memory_attibute->num_planes == 3);
assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]);
obj_surface->subsampling = SUBSAMPLE_YUV422H;
+ obj_surface->y_cr_offset = memory_attibute->offsets[1] / obj_surface->width;
+ obj_surface->y_cb_offset = memory_attibute->offsets[2] / obj_surface->width;
+ obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+ obj_surface->cb_cr_height = obj_surface->orig_height;
+ obj_surface->cb_cr_pitch = memory_attibute->pitches[1];
+
+ break;
+
+ case VA_FOURCC_422V:
+ ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
+
+ obj_surface->subsampling = SUBSAMPLE_YUV422H;
obj_surface->y_cb_offset = obj_surface->height;
obj_surface->y_cr_offset = memory_attibute->offsets[2] / obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width;
@@ -849,9 +1089,9 @@ i965_suface_external_memory(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '4', '4', 'P'):
- assert(memory_attibute->num_planes == 3);
- assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]);
+ case VA_FOURCC_444P:
+ ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
obj_surface->subsampling = SUBSAMPLE_YUV444;
obj_surface->y_cb_offset = obj_surface->height;
@@ -882,6 +1122,18 @@ i965_suface_external_memory(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
+/* byte-per-pixel of the first plane */
+static int
+bpp_1stplane_by_fourcc(unsigned int fourcc)
+{
+ const i965_fourcc_info *info = get_fourcc_info(fourcc);
+
+ if (info && (info->flag & I_S))
+ return info->bpp[0] / 8;
+ else
+ return 0;
+}
+
static VAStatus
i965_CreateSurfaces2(
VADriverContextP ctx,
@@ -904,24 +1156,26 @@ i965_CreateSurfaces2(
for (i = 0; i < num_attribs && attrib_list; i++) {
if ((attrib_list[i].type == VASurfaceAttribPixelFormat) &&
(attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) {
- assert(attrib_list[i].value.type == VAGenericValueTypeInteger);
+ ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypeInteger, VA_STATUS_ERROR_INVALID_PARAMETER);
expected_fourcc = attrib_list[i].value.value.i;
}
if ((attrib_list[i].type == VASurfaceAttribMemoryType) &&
(attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) {
- assert(attrib_list[i].value.type == VAGenericValueTypeInteger);
+ ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypeInteger, VA_STATUS_ERROR_INVALID_PARAMETER);
if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM)
memory_type = I965_SURFACE_MEM_GEM_FLINK; /* flinked GEM handle */
else if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME)
memory_type = I965_SURFACE_MEM_DRM_PRIME; /* drm prime fd */
+ else if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_VA)
+ memory_type = I965_SURFACE_MEM_NATIVE; /* va native memory, to be allocated */
}
if ((attrib_list[i].type == VASurfaceAttribExternalBufferDescriptor) &&
(attrib_list[i].flags == VA_SURFACE_ATTRIB_SETTABLE)) {
- assert(attrib_list[i].value.type == VAGenericValueTypePointer);
+ ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypePointer, VA_STATUS_ERROR_INVALID_PARAMETER);
memory_attibute = (VASurfaceAttribExternalBuffers *)attrib_list[i].value.value.p;
}
}
@@ -950,6 +1204,9 @@ i965_CreateSurfaces2(
obj_surface->status = VASurfaceReady;
obj_surface->orig_width = width;
obj_surface->orig_height = height;
+ obj_surface->user_disable_tiling = false;
+ obj_surface->user_h_stride_set = false;
+ obj_surface->user_v_stride_set = false;
obj_surface->subpic_render_idx = 0;
for(j = 0; j < I965_MAX_SUBPIC_SUM; j++){
@@ -957,18 +1214,49 @@ i965_CreateSurfaces2(
obj_surface->obj_subpic[j] = NULL;
}
- obj_surface->width = ALIGN(width, 16);
- obj_surface->height = ALIGN(height, 16);
+ assert(i965->codec_info->min_linear_wpitch);
+ assert(i965->codec_info->min_linear_hpitch);
+ obj_surface->width = ALIGN(width, i965->codec_info->min_linear_wpitch);
+ obj_surface->height = ALIGN(height, i965->codec_info->min_linear_hpitch);
obj_surface->flags = SURFACE_REFERENCED;
obj_surface->fourcc = 0;
obj_surface->bo = NULL;
obj_surface->locked_image_id = VA_INVALID_ID;
+ obj_surface->derived_image_id = VA_INVALID_ID;
obj_surface->private_data = NULL;
obj_surface->free_private_data = NULL;
obj_surface->subsampling = SUBSAMPLE_YUV420;
switch (memory_type) {
case I965_SURFACE_MEM_NATIVE:
+ if (memory_attibute) {
+ if (!(memory_attibute->flags & VA_SURFACE_EXTBUF_DESC_ENABLE_TILING))
+ obj_surface->user_disable_tiling = true;
+
+ if (memory_attibute->pixel_format) {
+ if (expected_fourcc)
+ ASSERT_RET(memory_attibute->pixel_format == expected_fourcc, VA_STATUS_ERROR_INVALID_PARAMETER);
+ else
+ expected_fourcc = memory_attibute->pixel_format;
+ }
+ ASSERT_RET(expected_fourcc, VA_STATUS_ERROR_INVALID_PARAMETER);
+ if (memory_attibute->pitches[0]) {
+ int bpp_1stplane = bpp_1stplane_by_fourcc(expected_fourcc);
+ ASSERT_RET(bpp_1stplane, VA_STATUS_ERROR_INVALID_PARAMETER);
+ obj_surface->width = memory_attibute->pitches[0]/bpp_1stplane;
+ obj_surface->user_h_stride_set = true;
+ ASSERT_RET(IS_ALIGNED(obj_surface->width, 16), VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(obj_surface->width >= width, VA_STATUS_ERROR_INVALID_PARAMETER);
+
+ if (memory_attibute->offsets[1]) {
+ ASSERT_RET(!memory_attibute->offsets[0], VA_STATUS_ERROR_INVALID_PARAMETER);
+ obj_surface->height = memory_attibute->offsets[1]/memory_attibute->pitches[0];
+ obj_surface->user_v_stride_set = true;
+ ASSERT_RET(IS_ALIGNED(obj_surface->height, 16), VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(obj_surface->height >= height, VA_STATUS_ERROR_INVALID_PARAMETER);
+ }
+ }
+ }
i965_surface_native_memory(ctx,
obj_surface,
format,
@@ -1030,7 +1318,7 @@ i965_DestroySurfaces(VADriverContextP ctx,
for (i = num_surfaces; i--; ) {
struct object_surface *obj_surface = SURFACE(surface_list[i]);
- assert(obj_surface);
+ ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE);
i965_destroy_surface(&i965->surface_heap, (struct object_base *)obj_surface);
}
@@ -1073,7 +1361,7 @@ i965_guess_surface_format(VADriverContextP ctx,
struct object_context *obj_context = NULL;
struct object_config *obj_config = NULL;
- *fourcc = VA_FOURCC('Y', 'V', '1', '2');
+ *fourcc = VA_FOURCC_YV12;
*is_tiled = 0;
if (i965->current_context_id == VA_INVALID_ID)
@@ -1090,8 +1378,10 @@ i965_guess_surface_format(VADriverContextP ctx,
if (!obj_config)
return;
- if (IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id)) {
- *fourcc = VA_FOURCC('N', 'V', '1', '2');
+ if (IS_GEN6(i965->intel.device_info) ||
+ IS_GEN7(i965->intel.device_info) ||
+ IS_GEN8(i965->intel.device_info)) {
+ *fourcc = VA_FOURCC_NV12;
*is_tiled = 1;
return;
}
@@ -1099,12 +1389,12 @@ i965_guess_surface_format(VADriverContextP ctx,
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
- *fourcc = VA_FOURCC('I', '4', '2', '0');
+ *fourcc = VA_FOURCC_I420;
*is_tiled = 0;
break;
default:
- *fourcc = VA_FOURCC('N', 'V', '1', '2');
+ *fourcc = VA_FOURCC_NV12;
*is_tiled = 0;
break;
}
@@ -1183,7 +1473,7 @@ i965_DestroySubpicture(VADriverContextP ctx,
if (!obj_subpic)
return VA_STATUS_ERROR_INVALID_SUBPICTURE;
- assert(obj_subpic->obj_image);
+ ASSERT_RET(obj_subpic->obj_image, VA_STATUS_ERROR_INVALID_SUBPICTURE);
i965_destroy_subpic(&i965->subpic_heap, (struct object_base *)obj_subpic);
return VA_STATUS_SUCCESS;
}
@@ -1254,7 +1544,7 @@ i965_AssociateSubpicture(VADriverContextP ctx,
if (!obj_subpic)
return VA_STATUS_ERROR_INVALID_SUBPICTURE;
- assert(obj_subpic->obj_image);
+ ASSERT_RET(obj_subpic->obj_image, VA_STATUS_ERROR_INVALID_SUBPICTURE);
obj_subpic->src_rect.x = src_x;
obj_subpic->src_rect.y = src_y;
@@ -1399,6 +1689,28 @@ i965_destroy_context(struct object_heap *heap, struct object_base *obj)
i965_release_buffer_store(&obj_context->codec_state.encode.slice_params_ext[i]);
free(obj_context->codec_state.encode.slice_params_ext);
+ if (obj_context->codec_state.encode.slice_rawdata_index) {
+ free(obj_context->codec_state.encode.slice_rawdata_index);
+ obj_context->codec_state.encode.slice_rawdata_index = NULL;
+ }
+ if (obj_context->codec_state.encode.slice_rawdata_count) {
+ free(obj_context->codec_state.encode.slice_rawdata_count);
+ obj_context->codec_state.encode.slice_rawdata_count = NULL;
+ }
+
+ if (obj_context->codec_state.encode.slice_header_index) {
+ free(obj_context->codec_state.encode.slice_header_index);
+ obj_context->codec_state.encode.slice_header_index = NULL;
+ }
+
+ for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++)
+ i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]);
+ free(obj_context->codec_state.encode.packed_header_params_ext);
+
+ for (i = 0; i < obj_context->codec_state.encode.num_packed_header_data_ext; i++)
+ i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_data_ext[i]);
+ free(obj_context->codec_state.encode.packed_header_data_ext);
+
} else {
assert(obj_context->codec_state.decode.num_slice_params <= obj_context->codec_state.decode.max_slice_params);
assert(obj_context->codec_state.decode.num_slice_datas <= obj_context->codec_state.decode.max_slice_datas);
@@ -1435,6 +1747,7 @@ i965_CreateContext(VADriverContextP ctx,
struct i965_render_state *render_state = &i965->render_state;
struct object_config *obj_config = CONFIG(config_id);
struct object_context *obj_context = NULL;
+ VAConfigAttrib *attrib;
VAStatus vaStatus = VA_STATUS_SUCCESS;
int contextID;
int i;
@@ -1463,7 +1776,7 @@ i965_CreateContext(VADriverContextP ctx,
render_state->inited = 1;
switch (obj_config->profile) {
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
if (!HAS_H264_DECODING(i965) &&
@@ -1471,8 +1784,14 @@ i965_CreateContext(VADriverContextP ctx,
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
render_state->interleaved_uv = 1;
break;
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
+ if (!HAS_H264_MVC_DECODING(i965))
+ return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+ render_state->interleaved_uv = 1;
+ break;
default:
- render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id));
+ render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_info) || IS_GEN7(i965->intel.device_info) || IS_GEN8(i965->intel.device_info));
break;
}
@@ -1504,12 +1823,45 @@ i965_CreateContext(VADriverContextP ctx,
assert(i965->codec_info->proc_hw_context_init);
obj_context->hw_context = i965->codec_info->proc_hw_context_init(ctx, obj_config);
} else if (VAEntrypointEncSlice == obj_config->entrypoint) { /*encode routin only*/
+ VAConfigAttrib *packed_attrib;
obj_context->codec_type = CODEC_ENC;
memset(&obj_context->codec_state.encode, 0, sizeof(obj_context->codec_state.encode));
obj_context->codec_state.encode.current_render_target = VA_INVALID_ID;
obj_context->codec_state.encode.max_slice_params = NUM_SLICES;
obj_context->codec_state.encode.slice_params = calloc(obj_context->codec_state.encode.max_slice_params,
sizeof(*obj_context->codec_state.encode.slice_params));
+ obj_context->codec_state.encode.max_packed_header_params_ext = NUM_SLICES;
+ obj_context->codec_state.encode.packed_header_params_ext =
+ calloc(obj_context->codec_state.encode.max_packed_header_params_ext,
+ sizeof(struct buffer_store *));
+
+ obj_context->codec_state.encode.max_packed_header_data_ext = NUM_SLICES;
+ obj_context->codec_state.encode.packed_header_data_ext =
+ calloc(obj_context->codec_state.encode.max_packed_header_data_ext,
+ sizeof(struct buffer_store *));
+
+ obj_context->codec_state.encode.max_slice_num = NUM_SLICES;
+ obj_context->codec_state.encode.slice_rawdata_index =
+ calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int));
+ obj_context->codec_state.encode.slice_rawdata_count =
+ calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int));
+
+ obj_context->codec_state.encode.slice_header_index =
+ calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int));
+
+ obj_context->codec_state.encode.slice_index = 0;
+ packed_attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribEncPackedHeaders);
+ if (packed_attrib)
+ obj_context->codec_state.encode.packed_header_flag = packed_attrib->value;
+ else {
+ /* use the default value. SPS/PPS/RAWDATA is passed from user
+ * while Slice_header data is generated by driver.
+ */
+ obj_context->codec_state.encode.packed_header_flag =
+ VA_ENC_PACKED_HEADER_SEQUENCE |
+ VA_ENC_PACKED_HEADER_PICTURE |
+ VA_ENC_PACKED_HEADER_RAW_DATA;
+ }
assert(i965->codec_info->enc_hw_context_init);
obj_context->hw_context = i965->codec_info->enc_hw_context_init(ctx, obj_config);
} else {
@@ -1528,6 +1880,11 @@ i965_CreateContext(VADriverContextP ctx,
}
}
+ attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat);
+ if (!attrib)
+ return VA_STATUS_ERROR_INVALID_CONFIG;
+ obj_context->codec_state.base.chroma_formats = attrib->value;
+
/* Error recovery */
if (VA_STATUS_SUCCESS != vaStatus) {
i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
@@ -1544,7 +1901,7 @@ i965_DestroyContext(VADriverContextP ctx, VAContextID context)
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_context *obj_context = CONTEXT(context);
- assert(obj_context);
+ ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT);
if (i965->current_context_id == context)
i965->current_context_id = VA_INVALID_ID;
@@ -1602,6 +1959,7 @@ i965_create_buffer_internal(VADriverContextP ctx,
case VAProcPipelineParameterBufferType:
case VAProcFilterParameterBufferType:
case VAHuffmanTableBufferType:
+ case VAProbabilityBufferType:
/* Ok */
break;
@@ -1625,6 +1983,7 @@ i965_create_buffer_internal(VADriverContextP ctx,
obj_buffer->num_elements = num_elements;
obj_buffer->size_element = size;
obj_buffer->type = type;
+ obj_buffer->export_refcount = 0;
obj_buffer->buffer_store = NULL;
buffer_store = calloc(1, sizeof(struct buffer_store));
assert(buffer_store);
@@ -1638,7 +1997,8 @@ i965_create_buffer_internal(VADriverContextP ctx,
dri_bo_subdata(buffer_store->bo, 0, size * num_elements, data);
} else if (type == VASliceDataBufferType ||
type == VAImageBufferType ||
- type == VAEncCodedBufferType) {
+ type == VAEncCodedBufferType ||
+ type == VAProbabilityBufferType) {
buffer_store->bo = dri_bo_alloc(i965->intel.bufmgr,
"Buffer",
size * num_elements, 64);
@@ -1705,10 +2065,7 @@ i965_BufferSetNumElements(VADriverContextP ctx,
struct object_buffer *obj_buffer = BUFFER(buf_id);
VAStatus vaStatus = VA_STATUS_SUCCESS;
- assert(obj_buffer);
-
- if (!obj_buffer)
- return VA_STATUS_ERROR_INVALID_BUFFER;
+ ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER);
if ((num_elements < 0) ||
(num_elements > obj_buffer->max_num_elements)) {
@@ -1732,11 +2089,11 @@ i965_MapBuffer(VADriverContextP ctx,
struct object_buffer *obj_buffer = BUFFER(buf_id);
VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
- assert(obj_buffer && obj_buffer->buffer_store);
- assert(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer);
- assert(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer));
+ ASSERT_RET(obj_buffer && obj_buffer->buffer_store, VA_STATUS_ERROR_INVALID_BUFFER);
+ ASSERT_RET(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER);
+ ASSERT_RET(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer), VA_STATUS_ERROR_INVALID_BUFFER);
- if (!obj_buffer || !obj_buffer->buffer_store)
+ if (obj_buffer->export_refcount > 0)
return VA_STATUS_ERROR_INVALID_BUFFER;
if (NULL != obj_buffer->buffer_store->bo) {
@@ -1749,7 +2106,7 @@ i965_MapBuffer(VADriverContextP ctx,
else
dri_bo_map(obj_buffer->buffer_store->bo, 1);
- assert(obj_buffer->buffer_store->bo->virtual);
+ ASSERT_RET(obj_buffer->buffer_store->bo->virtual, VA_STATUS_ERROR_OPERATION_FAILED);
*pbuf = obj_buffer->buffer_store->bo->virtual;
if (obj_buffer->type == VAEncCodedBufferType) {
@@ -1762,20 +2119,21 @@ i965_MapBuffer(VADriverContextP ctx,
coded_buffer_segment->base.buf = buffer = (unsigned char *)(obj_buffer->buffer_store->bo->virtual) + I965_CODEDBUFFER_HEADER_SIZE;
- if (coded_buffer_segment->codec == CODED_H264) {
+ if (coded_buffer_segment->codec == CODEC_H264 ||
+ coded_buffer_segment->codec == CODEC_H264_MVC) {
delimiter0 = H264_DELIMITER0;
delimiter1 = H264_DELIMITER1;
delimiter2 = H264_DELIMITER2;
delimiter3 = H264_DELIMITER3;
delimiter4 = H264_DELIMITER4;
- } else if (coded_buffer_segment->codec == CODED_MPEG2) {
+ } else if (coded_buffer_segment->codec == CODEC_MPEG2) {
delimiter0 = MPEG2_DELIMITER0;
delimiter1 = MPEG2_DELIMITER1;
delimiter2 = MPEG2_DELIMITER2;
delimiter3 = MPEG2_DELIMITER3;
delimiter4 = MPEG2_DELIMITER4;
} else {
- assert(0);
+ ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE);
}
for (i = 0; i < obj_buffer->size_element - I965_CODEDBUFFER_HEADER_SIZE - 3 - 0x1000; i++) {
@@ -1814,13 +2172,13 @@ i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id)
struct object_buffer *obj_buffer = BUFFER(buf_id);
VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
- assert(obj_buffer && obj_buffer->buffer_store);
- assert(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer);
- assert(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer));
-
- if (!obj_buffer || !obj_buffer->buffer_store)
+ if ((buf_id & OBJECT_HEAP_OFFSET_MASK) != BUFFER_ID_OFFSET)
return VA_STATUS_ERROR_INVALID_BUFFER;
+ ASSERT_RET(obj_buffer && obj_buffer->buffer_store, VA_STATUS_ERROR_INVALID_BUFFER);
+ ASSERT_RET(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_OPERATION_FAILED);
+ ASSERT_RET(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer), VA_STATUS_ERROR_OPERATION_FAILED);
+
if (NULL != obj_buffer->buffer_store->bo) {
unsigned int tiling, swizzle;
@@ -1846,10 +2204,7 @@ i965_DestroyBuffer(VADriverContextP ctx, VABufferID buffer_id)
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_buffer *obj_buffer = BUFFER(buffer_id);
- assert(obj_buffer);
-
- if (!obj_buffer)
- return VA_STATUS_ERROR_INVALID_BUFFER;
+ ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER);
i965_destroy_buffer(&i965->buffer_heap, (struct object_base *)obj_buffer);
@@ -1868,18 +2223,13 @@ i965_BeginPicture(VADriverContextP ctx,
VAStatus vaStatus;
int i;
- assert(obj_context);
-
- if (!obj_context)
- return VA_STATUS_ERROR_INVALID_CONTEXT;
-
- assert(obj_surface);
-
- if (!obj_surface)
- return VA_STATUS_ERROR_INVALID_SURFACE;
-
+ ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT);
+ ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE);
obj_config = obj_context->obj_config;
- assert(obj_config);
+ ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG);
+
+ if (is_surface_busy(i965, obj_surface))
+ return VA_STATUS_ERROR_SURFACE_BUSY;
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
@@ -1887,12 +2237,22 @@ i965_BeginPicture(VADriverContextP ctx,
vaStatus = VA_STATUS_SUCCESS;
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
vaStatus = VA_STATUS_SUCCESS;
break;
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
+ if (HAS_H264_MVC_DECODING_PROFILE(i965, obj_config->profile) ||
+ HAS_H264_MVC_ENCODING(i965)) {
+ vaStatus = VA_STATUS_SUCCESS;
+ } else {
+ ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE);
+ }
+ break;
+
case VAProfileVC1Simple:
case VAProfileVC1Main:
case VAProfileVC1Advanced:
@@ -1907,9 +2267,12 @@ i965_BeginPicture(VADriverContextP ctx,
vaStatus = VA_STATUS_SUCCESS;
break;
+ case VAProfileVP8Version0_3:
+ vaStatus = VA_STATUS_SUCCESS;
+ break;
+
default:
- assert(0);
- vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+ ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE);
break;
}
@@ -1939,6 +2302,20 @@ i965_BeginPicture(VADriverContextP ctx,
obj_context->codec_state.encode.num_slice_params_ext = 0;
obj_context->codec_state.encode.current_render_target = render_target; /*This is input new frame*/
obj_context->codec_state.encode.last_packed_header_type = 0;
+ memset(obj_context->codec_state.encode.slice_rawdata_index, 0,
+ sizeof(int) * obj_context->codec_state.encode.max_slice_num);
+ memset(obj_context->codec_state.encode.slice_rawdata_count, 0,
+ sizeof(int) * obj_context->codec_state.encode.max_slice_num);
+ memset(obj_context->codec_state.encode.slice_header_index, 0,
+ sizeof(int) * obj_context->codec_state.encode.max_slice_num);
+
+ for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++)
+ i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]);
+ for (i = 0; i < obj_context->codec_state.encode.num_packed_header_data_ext; i++)
+ i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_data_ext[i]);
+ obj_context->codec_state.encode.num_packed_header_params_ext = 0;
+ obj_context->codec_state.encode.num_packed_header_data_ext = 0;
+ obj_context->codec_state.encode.slice_index = 0;
} else {
obj_context->codec_state.decode.current_render_target = render_target;
i965_release_buffer_store(&obj_context->codec_state.decode.pic_param);
@@ -1967,8 +2344,6 @@ i965_BeginPicture(VADriverContextP ctx,
struct object_buffer *obj_buffer) \
{ \
struct category##_state *category = &obj_context->codec_state.category; \
- assert(obj_buffer->buffer_store->bo == NULL); \
- assert(obj_buffer->buffer_store->buffer); \
i965_release_buffer_store(&category->member); \
i965_reference_buffer_store(&category->member, obj_buffer->buffer_store); \
return VA_STATUS_SUCCESS; \
@@ -1999,6 +2374,7 @@ DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(picture_parameter, pic_param)
DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(iq_matrix, iq_matrix)
DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(bit_plane, bit_plane)
DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(huffman_table, huffman_table)
+DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(probability_data, probability_data)
#define DEF_RENDER_DECODE_MULTI_BUFFER_FUNC(name, member) DEF_RENDER_MULTI_BUFFER_FUNC(decode, name, member)
DEF_RENDER_DECODE_MULTI_BUFFER_FUNC(slice_parameter, slice_params)
@@ -2015,14 +2391,10 @@ i965_decoder_render_picture(VADriverContextP ctx,
VAStatus vaStatus = VA_STATUS_SUCCESS;
int i;
- assert(obj_context);
-
- if (!obj_context)
- return VA_STATUS_ERROR_INVALID_CONTEXT;
+ ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT);
for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) {
struct object_buffer *obj_buffer = BUFFER(buffers[i]);
- assert(obj_buffer);
if (!obj_buffer)
return VA_STATUS_ERROR_INVALID_BUFFER;
@@ -2052,6 +2424,10 @@ i965_decoder_render_picture(VADriverContextP ctx,
vaStatus = I965_RENDER_DECODE_BUFFER(huffman_table);
break;
+ case VAProbabilityBufferType:
+ vaStatus = I965_RENDER_DECODE_BUFFER(probability_data);
+ break;
+
default:
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE;
break;
@@ -2077,6 +2453,9 @@ DEF_RENDER_ENCODE_SINGLE_BUFFER_FUNC(picture_parameter_ext, pic_param_ext)
// DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(slice_parameter, slice_params)
DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(slice_parameter_ext, slice_params_ext)
+DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(packed_header_params_ext, packed_header_params_ext)
+DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(packed_header_data_ext, packed_header_data_ext)
+
static VAStatus
i965_encoder_render_packed_header_parameter_buffer(VADriverContextP ctx,
struct object_context *obj_context,
@@ -2085,8 +2464,8 @@ i965_encoder_render_packed_header_parameter_buffer(VADriverContextP ctx,
{
struct encode_state *encode = &obj_context->codec_state.encode;
- assert(obj_buffer->buffer_store->bo == NULL);
- assert(obj_buffer->buffer_store->buffer);
+ ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER);
+ ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER);
i965_release_buffer_store(&encode->packed_header_param[type_index]);
i965_reference_buffer_store(&encode->packed_header_param[type_index], obj_buffer->buffer_store);
@@ -2101,8 +2480,8 @@ i965_encoder_render_packed_header_data_buffer(VADriverContextP ctx,
{
struct encode_state *encode = &obj_context->codec_state.encode;
- assert(obj_buffer->buffer_store->bo == NULL);
- assert(obj_buffer->buffer_store->buffer);
+ ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER);
+ ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER);
i965_release_buffer_store(&encode->packed_header_data[type_index]);
i965_reference_buffer_store(&encode->packed_header_data[type_index], obj_buffer->buffer_store);
@@ -2117,10 +2496,14 @@ i965_encoder_render_misc_parameter_buffer(VADriverContextP ctx,
struct encode_state *encode = &obj_context->codec_state.encode;
VAEncMiscParameterBuffer *param = NULL;
- assert(obj_buffer->buffer_store->bo == NULL);
- assert(obj_buffer->buffer_store->buffer);
+ ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER);
+ ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER);
param = (VAEncMiscParameterBuffer *)obj_buffer->buffer_store->buffer;
+
+ if (param->type >= ARRAY_ELEMS(encode->misc_param))
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
i965_release_buffer_store(&encode->misc_param[param->type]);
i965_reference_buffer_store(&encode->misc_param[param->type], obj_buffer->buffer_store);
@@ -2136,16 +2519,14 @@ i965_encoder_render_picture(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_context *obj_context = CONTEXT(context);
VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
+ struct encode_state *encode;
int i;
- assert(obj_context);
-
- if (!obj_context)
- return VA_STATUS_ERROR_INVALID_CONTEXT;
+ ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT);
+ encode = &obj_context->codec_state.encode;
for (i = 0; i < num_buffers; i++) {
struct object_buffer *obj_buffer = BUFFER(buffers[i]);
- assert(obj_buffer);
if (!obj_buffer)
return VA_STATUS_ERROR_INVALID_BUFFER;
@@ -2169,34 +2550,141 @@ i965_encoder_render_picture(VADriverContextP ctx,
case VAEncSliceParameterBufferType:
vaStatus = I965_RENDER_ENCODE_BUFFER(slice_parameter_ext);
+ if (vaStatus == VA_STATUS_SUCCESS) {
+ /* When the max number of slices is updated, it also needs
+ * to reallocate the arrays that is used to store
+ * the packed data index/count for the slice
+ */
+ if (!(encode->packed_header_flag & VA_ENC_PACKED_HEADER_SLICE)) {
+ encode->slice_index++;
+ }
+ if (encode->slice_index == encode->max_slice_num) {
+ int slice_num = encode->max_slice_num;
+ encode->slice_rawdata_index = realloc(encode->slice_rawdata_index,
+ (slice_num + NUM_SLICES) * sizeof(int));
+ encode->slice_rawdata_count = realloc(encode->slice_rawdata_count,
+ (slice_num + NUM_SLICES) * sizeof(int));
+ encode->slice_header_index = realloc(encode->slice_header_index,
+ (slice_num + NUM_SLICES) * sizeof(int));
+ memset(encode->slice_rawdata_index + slice_num, 0,
+ sizeof(int) * NUM_SLICES);
+ memset(encode->slice_rawdata_count + slice_num, 0,
+ sizeof(int) * NUM_SLICES);
+ memset(encode->slice_header_index + slice_num, 0,
+ sizeof(int) * NUM_SLICES);
+
+ encode->max_slice_num += NUM_SLICES;
+ if ((encode->slice_rawdata_index == NULL) ||
+ (encode->slice_header_index == NULL) ||
+ (encode->slice_rawdata_count == NULL)) {
+ vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
+ return vaStatus;
+ }
+ }
+ }
break;
case VAEncPackedHeaderParameterBufferType:
{
- struct encode_state *encode = &obj_context->codec_state.encode;
VAEncPackedHeaderParameterBuffer *param = (VAEncPackedHeaderParameterBuffer *)obj_buffer->buffer_store->buffer;
encode->last_packed_header_type = param->type;
- vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx,
+ if ((param->type == VAEncPackedHeaderRawData) ||
+ (param->type == VAEncPackedHeaderSlice)) {
+ vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_params_ext);
+ } else {
+ vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx,
obj_context,
obj_buffer,
va_enc_packed_type_to_idx(encode->last_packed_header_type));
+ }
break;
}
case VAEncPackedHeaderDataBufferType:
{
- struct encode_state *encode = &obj_context->codec_state.encode;
-
- assert(encode->last_packed_header_type == VAEncPackedHeaderSequence ||
- encode->last_packed_header_type == VAEncPackedHeaderPicture ||
- encode->last_packed_header_type == VAEncPackedHeaderSlice ||
+ if (encode->last_packed_header_type == 0) {
+ WARN_ONCE("the packed header data is passed without type!\n");
+ vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER;
+ return vaStatus;
+ }
+ if (encode->last_packed_header_type == VAEncPackedHeaderRawData ||
+ encode->last_packed_header_type == VAEncPackedHeaderSlice) {
+ vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_data_ext);
+
+ /* When the PACKED_SLICE_HEADER flag is passed, it will use
+ * the packed_slice_header as the delimeter to decide how
+ * the packed rawdata is inserted for the given slice.
+ * Otherwise it will use the VAEncSequenceParameterBuffer
+ * as the delimeter
+ */
+ if (encode->packed_header_flag & VA_ENC_PACKED_HEADER_SLICE) {
+ /* store the first index of the packed header data for current slice */
+ if (encode->slice_rawdata_index[encode->slice_index] == 0) {
+ encode->slice_rawdata_index[encode->slice_index] =
+ SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1);
+ }
+ encode->slice_rawdata_count[encode->slice_index]++;
+ if (encode->last_packed_header_type == VAEncPackedHeaderSlice) {
+ /* find one packed slice_header delimeter. And the following
+ * packed data is for the next slice
+ */
+ encode->slice_header_index[encode->slice_index] =
+ SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1);
+ encode->slice_index++;
+ /* Reallocate the buffer to record the index/count of
+ * packed_data for one slice.
+ */
+ if (encode->slice_index == encode->max_slice_num) {
+ int slice_num = encode->max_slice_num;
+
+ encode->slice_rawdata_index = realloc(encode->slice_rawdata_index,
+ (slice_num + NUM_SLICES) * sizeof(int));
+ encode->slice_rawdata_count = realloc(encode->slice_rawdata_count,
+ (slice_num + NUM_SLICES) * sizeof(int));
+ encode->slice_header_index = realloc(encode->slice_header_index,
+ (slice_num + NUM_SLICES) * sizeof(int));
+ memset(encode->slice_rawdata_index + slice_num, 0,
+ sizeof(int) * NUM_SLICES);
+ memset(encode->slice_rawdata_count + slice_num, 0,
+ sizeof(int) * NUM_SLICES);
+ memset(encode->slice_header_index + slice_num, 0,
+ sizeof(int) * NUM_SLICES);
+ encode->max_slice_num += NUM_SLICES;
+ }
+ }
+ } else {
+ if (vaStatus == VA_STATUS_SUCCESS) {
+ /* store the first index of the packed header data for current slice */
+ if (encode->slice_rawdata_index[encode->slice_index] == 0) {
+ encode->slice_rawdata_index[encode->slice_index] =
+ SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1);
+ }
+ encode->slice_rawdata_count[encode->slice_index]++;
+ if (encode->last_packed_header_type == VAEncPackedHeaderSlice) {
+ if (encode->slice_header_index[encode->slice_index] == 0) {
+ encode->slice_header_index[encode->slice_index] =
+ SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1);
+ } else {
+ WARN_ONCE("Multi slice header data is passed for"
+ " slice %d!\n", encode->slice_index);
+ }
+ }
+ }
+ }
+ } else {
+ ASSERT_RET(encode->last_packed_header_type == VAEncPackedHeaderSequence ||
+ encode->last_packed_header_type == VAEncPackedHeaderPicture ||
+ encode->last_packed_header_type == VAEncPackedHeaderSlice ||
(((encode->last_packed_header_type & VAEncPackedHeaderMiscMask) == VAEncPackedHeaderMiscMask) &&
- ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0)));
- vaStatus = i965_encoder_render_packed_header_data_buffer(ctx,
+ ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0)),
+ VA_STATUS_ERROR_ENCODING_ERROR);
+ vaStatus = i965_encoder_render_packed_header_data_buffer(ctx,
obj_context,
obj_buffer,
va_enc_packed_type_to_idx(encode->last_packed_header_type));
+ }
+ encode->last_packed_header_type = 0;
break;
}
@@ -2231,14 +2719,10 @@ i965_proc_render_picture(VADriverContextP ctx,
VAStatus vaStatus = VA_STATUS_SUCCESS;
int i;
- assert(obj_context);
-
- if (!obj_context)
- return VA_STATUS_ERROR_INVALID_CONTEXT;
+ ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT);
for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) {
struct object_buffer *obj_buffer = BUFFER(buffers[i]);
- assert(obj_buffer);
if (!obj_buffer)
return VA_STATUS_ERROR_INVALID_BUFFER;
@@ -2269,13 +2753,13 @@ i965_RenderPicture(VADriverContextP ctx,
VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
obj_context = CONTEXT(context);
- assert(obj_context);
+ ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT);
+
+ if (num_buffers <= 0)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (!obj_context)
- return VA_STATUS_ERROR_INVALID_CONTEXT;
-
obj_config = obj_context->obj_config;
- assert(obj_config);
+ ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG);
if (VAEntrypointVideoProc == obj_config->entrypoint) {
vaStatus = i965_proc_render_picture(ctx, context, buffers, num_buffers);
@@ -2295,33 +2779,58 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context)
struct object_context *obj_context = CONTEXT(context);
struct object_config *obj_config;
- assert(obj_context);
-
- if (!obj_context)
- return VA_STATUS_ERROR_INVALID_CONTEXT;
-
+ ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT);
obj_config = obj_context->obj_config;
- assert(obj_config);
+ ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG);
if (obj_context->codec_type == CODEC_PROC) {
- assert(VAEntrypointVideoProc == obj_config->entrypoint);
+ ASSERT_RET(VAEntrypointVideoProc == obj_config->entrypoint, VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT);
} else if (obj_context->codec_type == CODEC_ENC) {
- assert(VAEntrypointEncSlice == obj_config->entrypoint);
-
- assert(obj_context->codec_state.encode.pic_param ||
- obj_context->codec_state.encode.pic_param_ext);
- assert(obj_context->codec_state.encode.seq_param ||
- obj_context->codec_state.encode.seq_param_ext);
- assert(obj_context->codec_state.encode.num_slice_params >= 1 ||
- obj_context->codec_state.encode.num_slice_params_ext >= 1);
+ ASSERT_RET(VAEntrypointEncSlice == obj_config->entrypoint, VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT);
+
+ if (obj_context->codec_state.encode.num_packed_header_params_ext !=
+ obj_context->codec_state.encode.num_packed_header_data_ext) {
+ WARN_ONCE("the packed header/data is not paired for encoding!\n");
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+ if (!(obj_context->codec_state.encode.pic_param ||
+ obj_context->codec_state.encode.pic_param_ext)) {
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+ if (!(obj_context->codec_state.encode.seq_param ||
+ obj_context->codec_state.encode.seq_param_ext)) {
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+ if ((obj_context->codec_state.encode.num_slice_params <=0) &&
+ (obj_context->codec_state.encode.num_slice_params_ext <=0)) {
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+
+ if ((obj_context->codec_state.encode.packed_header_flag & VA_ENC_PACKED_HEADER_SLICE) &&
+ (obj_context->codec_state.encode.num_slice_params_ext !=
+ obj_context->codec_state.encode.slice_index)) {
+ WARN_ONCE("packed slice_header data is missing for some slice"
+ " under packed SLICE_HEADER mode\n");
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
} else {
- assert(obj_context->codec_state.decode.pic_param);
- assert(obj_context->codec_state.decode.num_slice_params >= 1);
- assert(obj_context->codec_state.decode.num_slice_datas >= 1);
- assert(obj_context->codec_state.decode.num_slice_params == obj_context->codec_state.decode.num_slice_datas);
+ if (obj_context->codec_state.decode.pic_param == NULL) {
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+ if (obj_context->codec_state.decode.num_slice_params <=0) {
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+ if (obj_context->codec_state.decode.num_slice_datas <=0) {
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+
+ if (obj_context->codec_state.decode.num_slice_params !=
+ obj_context->codec_state.decode.num_slice_datas) {
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
}
- assert(obj_context->hw_context->run);
+ ASSERT_RET(obj_context->hw_context->run, VA_STATUS_ERROR_OPERATION_FAILED);
return obj_context->hw_context->run(ctx, obj_config->profile, &obj_context->codec_state, obj_context->hw_context);
}
@@ -2332,7 +2841,7 @@ i965_SyncSurface(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *obj_surface = SURFACE(render_target);
- assert(obj_surface);
+ ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE);
if(obj_surface->bo)
drm_intel_bo_wait_rendering(obj_surface->bo);
@@ -2348,7 +2857,7 @@ i965_QuerySurfaceStatus(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *obj_surface = SURFACE(render_target);
- assert(obj_surface);
+ ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE);
if (obj_surface->bo) {
if (drm_intel_bo_busy(obj_surface->bo)){
@@ -2410,7 +2919,16 @@ i965_display_attributes_init(VADriverContextP ctx)
);
i965->rotation_attrib = get_display_attribute(ctx, VADisplayAttribRotation);
- if (!i965->rotation_attrib) {
+ i965->brightness_attrib = get_display_attribute(ctx, VADisplayAttribBrightness);
+ i965->contrast_attrib = get_display_attribute(ctx, VADisplayAttribContrast);
+ i965->hue_attrib = get_display_attribute(ctx, VADisplayAttribHue);
+ i965->saturation_attrib = get_display_attribute(ctx, VADisplayAttribSaturation);
+
+ if (!i965->rotation_attrib ||
+ !i965->brightness_attrib ||
+ !i965->contrast_attrib ||
+ !i965->hue_attrib ||
+ !i965->saturation_attrib) {
goto error;
}
return true;
@@ -2553,7 +3071,7 @@ i965_CreateImage(VADriverContextP ctx,
struct object_image *obj_image;
VAStatus va_status = VA_STATUS_ERROR_OPERATION_FAILED;
VAImageID image_id;
- unsigned int width2, height2, size2, size;
+ unsigned int size2, size, awidth, aheight;
out_image->image_id = VA_INVALID_ID;
out_image->buf = VA_INVALID_ID;
@@ -2573,83 +3091,101 @@ i965_CreateImage(VADriverContextP ctx,
image->image_id = image_id;
image->buf = VA_INVALID_ID;
- size = width * height;
- width2 = (width + 1) / 2;
- height2 = (height + 1) / 2;
- size2 = width2 * height2;
+ awidth = ALIGN(width, i965->codec_info->min_linear_wpitch);
+
+ if ((format->fourcc == VA_FOURCC_YV12) ||
+ (format->fourcc == VA_FOURCC_I420)) {
+ if (awidth % 128 != 0) {
+ awidth = ALIGN(width, 128);
+ }
+ }
+
+ aheight = ALIGN(height, i965->codec_info->min_linear_hpitch);
+ size = awidth * aheight;
+ size2 = (awidth / 2) * (aheight / 2);
image->num_palette_entries = 0;
image->entry_bytes = 0;
memset(image->component_order, 0, sizeof(image->component_order));
switch (format->fourcc) {
- case VA_FOURCC('I','A','4','4'):
- case VA_FOURCC('A','I','4','4'):
+ case VA_FOURCC_IA44:
+ case VA_FOURCC_AI44:
image->num_planes = 1;
- image->pitches[0] = width;
+ image->pitches[0] = awidth;
image->offsets[0] = 0;
- image->data_size = image->offsets[0] + image->pitches[0] * height;
+ image->data_size = image->offsets[0] + image->pitches[0] * aheight;
image->num_palette_entries = 16;
image->entry_bytes = 3;
image->component_order[0] = 'R';
image->component_order[1] = 'G';
image->component_order[2] = 'B';
break;
- case VA_FOURCC('I','A','8','8'):
- case VA_FOURCC('A','I','8','8'):
+ case VA_FOURCC_IA88:
+ case VA_FOURCC_AI88:
image->num_planes = 1;
- image->pitches[0] = width * 2;
+ image->pitches[0] = awidth * 2;
image->offsets[0] = 0;
- image->data_size = image->offsets[0] + image->pitches[0] * height;
+ image->data_size = image->offsets[0] + image->pitches[0] * aheight;
image->num_palette_entries = 256;
image->entry_bytes = 3;
image->component_order[0] = 'R';
image->component_order[1] = 'G';
image->component_order[2] = 'B';
break;
- case VA_FOURCC('A','R','G','B'):
- case VA_FOURCC('A','B','G','R'):
- case VA_FOURCC('B','G','R','A'):
- case VA_FOURCC('R','G','B','A'):
- case VA_FOURCC('B','G','R','X'):
- case VA_FOURCC('R','G','B','X'):
+ case VA_FOURCC_ARGB:
+ case VA_FOURCC_ABGR:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_RGBA:
+ case VA_FOURCC_BGRX:
+ case VA_FOURCC_RGBX:
image->num_planes = 1;
- image->pitches[0] = width * 4;
+ image->pitches[0] = awidth * 4;
image->offsets[0] = 0;
- image->data_size = image->offsets[0] + image->pitches[0] * height;
+ image->data_size = image->offsets[0] + image->pitches[0] * aheight;
break;
- case VA_FOURCC('Y','V','1','2'):
+ case VA_FOURCC_YV12:
image->num_planes = 3;
- image->pitches[0] = width;
+ image->pitches[0] = awidth;
image->offsets[0] = 0;
- image->pitches[1] = width2;
- image->offsets[1] = size + size2;
- image->pitches[2] = width2;
- image->offsets[2] = size;
+ image->pitches[1] = awidth / 2;
+ image->offsets[1] = size;
+ image->pitches[2] = awidth / 2;
+ image->offsets[2] = size + size2;
image->data_size = size + 2 * size2;
break;
- case VA_FOURCC('I','4','2','0'):
+ case VA_FOURCC_I420:
image->num_planes = 3;
- image->pitches[0] = width;
+ image->pitches[0] = awidth;
image->offsets[0] = 0;
- image->pitches[1] = width2;
+ image->pitches[1] = awidth / 2;
image->offsets[1] = size;
- image->pitches[2] = width2;
+ image->pitches[2] = awidth / 2;
image->offsets[2] = size + size2;
image->data_size = size + 2 * size2;
break;
- case VA_FOURCC('N','V','1','2'):
+ case VA_FOURCC_422H:
+ image->num_planes = 3;
+ image->pitches[0] = awidth;
+ image->offsets[0] = 0;
+ image->pitches[1] = awidth / 2;
+ image->offsets[1] = size;
+ image->pitches[2] = awidth / 2;
+ image->offsets[2] = size + (awidth / 2) * aheight;
+ image->data_size = size + 2 * ((awidth / 2) * aheight);
+ break;
+ case VA_FOURCC_NV12:
image->num_planes = 2;
- image->pitches[0] = width;
+ image->pitches[0] = awidth;
image->offsets[0] = 0;
- image->pitches[1] = width;
+ image->pitches[1] = awidth;
image->offsets[1] = size;
image->data_size = size + 2 * size2;
break;
- case VA_FOURCC('Y','U','Y','2'):
- case VA_FOURCC('U','Y','V','Y'):
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
image->num_planes = 1;
- image->pitches[0] = width * 2;
+ image->pitches[0] = awidth * 2;
image->offsets[0] = 0;
image->data_size = size * 2;
break;
@@ -2691,7 +3227,7 @@ i965_CreateImage(VADriverContextP ctx,
return va_status;
}
-void
+VAStatus
i965_check_alloc_surface_bo(VADriverContextP ctx,
struct object_surface *obj_surface,
int tiled,
@@ -2702,26 +3238,34 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
int region_width, region_height;
if (obj_surface->bo) {
- assert(obj_surface->fourcc);
- assert(obj_surface->fourcc == fourcc);
- assert(obj_surface->subsampling == subsampling);
- return;
+ ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
+ ASSERT_RET(obj_surface->fourcc == fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
+ ASSERT_RET(obj_surface->subsampling == subsampling, VA_STATUS_ERROR_INVALID_SURFACE);
+ return VA_STATUS_SUCCESS;
}
obj_surface->x_cb_offset = 0; /* X offset is always 0 */
obj_surface->x_cr_offset = 0;
- if (tiled) {
- assert(fourcc != VA_FOURCC('I', '4', '2', '0') &&
- fourcc != VA_FOURCC('I', 'Y', 'U', 'V') &&
- fourcc != VA_FOURCC('Y', 'V', '1', '2'));
+ if ((tiled && !obj_surface->user_disable_tiling)) {
+ ASSERT_RET(fourcc != VA_FOURCC_I420 &&
+ fourcc != VA_FOURCC_IYUV &&
+ fourcc != VA_FOURCC_YV12,
+ VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT);
+ if (obj_surface->user_h_stride_set) {
+ ASSERT_RET(IS_ALIGNED(obj_surface->width, 128), VA_STATUS_ERROR_INVALID_PARAMETER);
+ } else
+ obj_surface->width = ALIGN(obj_surface->orig_width, 128);
+
+ if (obj_surface->user_v_stride_set) {
+ ASSERT_RET(IS_ALIGNED(obj_surface->height, 32), VA_STATUS_ERROR_INVALID_PARAMETER);
+ } else
+ obj_surface->height = ALIGN(obj_surface->orig_height, 32);
- obj_surface->width = ALIGN(obj_surface->orig_width, 128);
- obj_surface->height = ALIGN(obj_surface->orig_height, 32);
region_height = obj_surface->height;
switch (fourcc) {
- case VA_FOURCC('N', 'V', '1', '2'):
+ case VA_FOURCC_NV12:
assert(subsampling == SUBSAMPLE_YUV420);
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width / 2;
@@ -2733,7 +3277,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
break;
- case VA_FOURCC('I', 'M', 'C', '1'):
+ case VA_FOURCC_IMC1:
assert(subsampling == SUBSAMPLE_YUV420);
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width / 2;
@@ -2745,7 +3289,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
break;
- case VA_FOURCC('I', 'M', 'C', '3'):
+ case VA_FOURCC_IMC3:
assert(subsampling == SUBSAMPLE_YUV420);
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width / 2;
@@ -2757,7 +3301,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '2', '2', 'H'):
+ case VA_FOURCC_422H:
assert(subsampling == SUBSAMPLE_YUV422H);
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width / 2;
@@ -2769,7 +3313,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '2', '2', 'V'):
+ case VA_FOURCC_422V:
assert(subsampling == SUBSAMPLE_YUV422V);
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width;
@@ -2781,7 +3325,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '1', '1', 'P'):
+ case VA_FOURCC_411P:
assert(subsampling == SUBSAMPLE_YUV411);
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width / 4;
@@ -2793,7 +3337,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
break;
- case VA_FOURCC('4', '4', '4', 'P'):
+ case VA_FOURCC_444P:
assert(subsampling == SUBSAMPLE_YUV444);
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = obj_surface->orig_width;
@@ -2805,44 +3349,46 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
break;
- case VA_FOURCC('Y', '8', '0', '0'):
+ case VA_FOURCC_Y800:
assert(subsampling == SUBSAMPLE_YUV400);
- obj_surface->cb_cr_pitch = obj_surface->width;
+ obj_surface->cb_cr_pitch = 0;
obj_surface->cb_cr_width = 0;
obj_surface->cb_cr_height = 0;
- obj_surface->y_cb_offset = obj_surface->height;
- obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN(obj_surface->cb_cr_height, 32);
+ obj_surface->y_cb_offset = 0;
+ obj_surface->y_cr_offset = 0;
region_width = obj_surface->width;
- region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32) * 2;
+ region_height = obj_surface->height;
break;
- case VA_FOURCC('Y', 'U', 'Y', '2'):
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
assert(subsampling == SUBSAMPLE_YUV422H);
- obj_surface->cb_cr_pitch = obj_surface->width * 2;
+ obj_surface->width = ALIGN(obj_surface->orig_width * 2, 128);
+ obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->y_cb_offset = 0;
obj_surface->y_cr_offset = 0;
obj_surface->cb_cr_width = obj_surface->orig_width / 2;
obj_surface->cb_cr_height = obj_surface->orig_height / 2;
- region_width = obj_surface->width * 2;
+ region_width = obj_surface->width;
region_height = obj_surface->height;
break;
- case VA_FOURCC('R', 'G', 'B', 'A'):
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
- case VA_FOURCC('B', 'G', 'R', 'X'):
+ case VA_FOURCC_RGBA:
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_BGRX:
assert(subsampling == SUBSAMPLE_RGBX);
- region_width = obj_surface->width * 4;
+ obj_surface->width = ALIGN(obj_surface->orig_width * 4, 128);
+ region_width = obj_surface->width;
region_height = obj_surface->height;
break;
default:
/* Never get here */
- assert(0);
+ ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT);
break;
}
} else {
@@ -2855,7 +3401,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
region_height = obj_surface->height;
switch (fourcc) {
- case VA_FOURCC('N', 'V', '1', '2'):
+ case VA_FOURCC_NV12:
obj_surface->y_cb_offset = obj_surface->height;
obj_surface->y_cr_offset = obj_surface->height;
obj_surface->cb_cr_width = obj_surface->orig_width / 2;
@@ -2864,9 +3410,18 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
region_height = obj_surface->height + obj_surface->height / 2;
break;
- case VA_FOURCC('Y', 'V', '1', '2'):
- case VA_FOURCC('I', '4', '2', '0'):
- if (fourcc == VA_FOURCC('Y', 'V', '1', '2')) {
+ case VA_FOURCC_YV16:
+ obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+ obj_surface->cb_cr_height = obj_surface->orig_height;
+ obj_surface->y_cr_offset = obj_surface->height;
+ obj_surface->y_cb_offset = obj_surface->y_cr_offset + ALIGN(obj_surface->cb_cr_height, 32) / 2;
+ obj_surface->cb_cr_pitch = obj_surface->width / 2;
+ region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32);
+ break;
+
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_I420:
+ if (fourcc == VA_FOURCC_YV12) {
obj_surface->y_cr_offset = obj_surface->height;
obj_surface->y_cb_offset = obj_surface->height + obj_surface->height / 4;
} else {
@@ -2880,34 +3435,36 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
region_height = obj_surface->height + obj_surface->height / 2;
break;
- case VA_FOURCC('Y', 'U', 'Y', '2'):
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
+ obj_surface->width = ALIGN(obj_surface->orig_width * 2, i965->codec_info->min_linear_wpitch);
obj_surface->y_cb_offset = 0;
obj_surface->y_cr_offset = 0;
obj_surface->cb_cr_width = obj_surface->orig_width / 2;
obj_surface->cb_cr_height = obj_surface->orig_height;
- obj_surface->cb_cr_pitch = obj_surface->width * 2;
- region_width = obj_surface->width * 2;
+ obj_surface->cb_cr_pitch = obj_surface->width;
+ region_width = obj_surface->width;
region_height = obj_surface->height;
break;
- case VA_FOURCC('R', 'G', 'B', 'A'):
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
- case VA_FOURCC('B', 'G', 'R', 'X'):
- region_width = obj_surface->width * 4;
+ case VA_FOURCC_RGBA:
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_BGRX:
+ obj_surface->width = ALIGN(obj_surface->orig_width * 4, i965->codec_info->min_linear_wpitch);
+ region_width = obj_surface->width;
region_height = obj_surface->height;
break;
default:
/* Never get here */
- assert(0);
+ ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT);
break;
}
}
obj_surface->size = ALIGN(region_width * region_height, 0x1000);
- if (tiled) {
+ if ((tiled && !obj_surface->user_disable_tiling)) {
uint32_t tiling_mode = I915_TILING_Y; /* always uses Y-tiled format */
unsigned long pitch;
@@ -2920,9 +3477,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
&pitch,
0);
assert(tiling_mode == I915_TILING_Y);
- assert(pitch == obj_surface->width ||
- pitch == obj_surface->width * 2 ||
- pitch == obj_surface->width * 4) ;
+ assert(pitch == obj_surface->width);
} else {
obj_surface->bo = dri_bo_alloc(i965->intel.bufmgr,
"vaapi surface",
@@ -2933,6 +3488,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
obj_surface->fourcc = fourcc;
obj_surface->subsampling = subsampling;
assert(obj_surface->bo);
+ return VA_STATUS_SUCCESS;
}
VAStatus i965_DeriveImage(VADriverContextP ctx,
@@ -2954,13 +3510,15 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
if (!obj_surface->bo) {
unsigned int is_tiled = 0;
- unsigned int fourcc = VA_FOURCC('Y', 'V', '1', '2');
+ unsigned int fourcc = VA_FOURCC_YV12;
i965_guess_surface_format(ctx, surface, &fourcc, &is_tiled);
int sampling = get_sampling_from_fourcc(fourcc);
- i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling);
+ va_status = i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
}
- assert(obj_surface->fourcc);
+ ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
w_pitch = obj_surface->width;
@@ -2994,7 +3552,17 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
image->format.bits_per_pixel = 12;
switch (image->format.fourcc) {
- case VA_FOURCC('Y', 'V', '1', '2'):
+ case VA_FOURCC_YV12:
+ image->num_planes = 3;
+ image->pitches[0] = w_pitch; /* Y */
+ image->offsets[0] = 0;
+ image->pitches[1] = obj_surface->cb_cr_pitch; /* V */
+ image->offsets[1] = w_pitch * obj_surface->y_cr_offset;
+ image->pitches[2] = obj_surface->cb_cr_pitch; /* U */
+ image->offsets[2] = w_pitch * obj_surface->y_cb_offset;
+ break;
+
+ case VA_FOURCC_YV16:
image->num_planes = 3;
image->pitches[0] = w_pitch; /* Y */
image->offsets[0] = 0;
@@ -3004,7 +3572,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
image->offsets[2] = w_pitch * obj_surface->y_cb_offset;
break;
- case VA_FOURCC('N', 'V', '1', '2'):
+ case VA_FOURCC_NV12:
image->num_planes = 2;
image->pitches[0] = w_pitch; /* Y */
image->offsets[0] = 0;
@@ -3012,7 +3580,12 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
image->offsets[1] = w_pitch * obj_surface->y_cb_offset;
break;
- case VA_FOURCC('I', '4', '2', '0'):
+ case VA_FOURCC_I420:
+ case VA_FOURCC_422H:
+ case VA_FOURCC_IMC3:
+ case VA_FOURCC_444P:
+ case VA_FOURCC_422V:
+ case VA_FOURCC_411P:
image->num_planes = 3;
image->pitches[0] = w_pitch; /* Y */
image->offsets[0] = 0;
@@ -3021,21 +3594,20 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
image->pitches[2] = obj_surface->cb_cr_pitch; /* V */
image->offsets[2] = w_pitch * obj_surface->y_cr_offset;
break;
- case VA_FOURCC('Y', 'U', 'Y', '2'):
+
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
+ case VA_FOURCC_Y800:
image->num_planes = 1;
- image->pitches[0] = obj_surface->width * 2; /* Y, width is aligned already */
+ image->pitches[0] = obj_surface->width; /* Y, width is aligned already */
image->offsets[0] = 0;
- image->pitches[1] = obj_surface->width * 2; /* U */
- image->offsets[1] = 0;
- image->pitches[2] = obj_surface->width * 2; /* V */
- image->offsets[2] = 0;
break;
- case VA_FOURCC('R', 'G', 'B', 'A'):
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
- case VA_FOURCC('B', 'G', 'R', 'X'):
+ case VA_FOURCC_RGBA:
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_BGRX:
image->num_planes = 1;
- image->pitches[0] = obj_surface->width * 4;
+ image->pitches[0] = obj_surface->width;
break;
default:
goto error;
@@ -3066,6 +3638,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
*out_image = *image;
obj_surface->flags |= SURFACE_DERIVED;
+ obj_surface->derived_image_id = image_id;
obj_image->derived_surface = surface;
return VA_STATUS_SUCCESS;
@@ -3109,6 +3682,7 @@ i965_DestroyImage(VADriverContextP ctx, VAImageID image)
if (obj_surface) {
obj_surface->flags &= ~SURFACE_DERIVED;
+ obj_surface->derived_image_id = VA_INVALID_ID;
}
i965_destroy_image(&i965->image_heap, (struct object_base *)obj_image);
@@ -3146,51 +3720,12 @@ i965_SetImagePalette(VADriverContextP ctx,
static int
get_sampling_from_fourcc(unsigned int fourcc)
{
- int surface_sampling = -1;
-
- switch (fourcc) {
- case VA_FOURCC('N', 'V', '1', '2'):
- case VA_FOURCC('Y', 'V', '1', '2'):
- case VA_FOURCC('I', '4', '2', '0'):
- case VA_FOURCC('I', 'Y', 'U', 'V'):
- case VA_FOURCC('I', 'M', 'C', '1'):
- case VA_FOURCC('I', 'M', 'C', '3'):
- surface_sampling = SUBSAMPLE_YUV420;
- break;
- case VA_FOURCC('Y', 'U', 'Y', '2'):
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
- case VA_FOURCC('4', '2', '2', 'H'):
- surface_sampling = SUBSAMPLE_YUV422H;
- break;
- case VA_FOURCC('4', '2', '2', 'V'):
- surface_sampling = SUBSAMPLE_YUV422V;
- break;
-
- case VA_FOURCC('4', '4', '4', 'P'):
- surface_sampling = SUBSAMPLE_YUV444;
- break;
-
- case VA_FOURCC('4', '1', '1', 'P'):
- surface_sampling = SUBSAMPLE_YUV411;
- break;
-
- case VA_FOURCC('Y', '8', '0', '0'):
- surface_sampling = SUBSAMPLE_YUV400;
- break;
- case VA_FOURCC('R','G','B','A'):
- case VA_FOURCC('R','G','B','X'):
- case VA_FOURCC('B','G','R','A'):
- case VA_FOURCC('B','G','R','X'):
- surface_sampling = SUBSAMPLE_RGBX;
- break;
- default:
- /* Never get here */
- assert(0);
- break;
-
- }
+ const i965_fourcc_info *info = get_fourcc_info(fourcc);
- return surface_sampling;
+ if (info && (info->flag & I_S))
+ return info->subsampling;
+ else
+ return -1;
}
static inline void
@@ -3207,7 +3742,7 @@ memcpy_pic(uint8_t *dst, unsigned int dst_stride,
}
}
-static void
+static VAStatus
get_image_i420(struct object_image *obj_image, uint8_t *image_data,
struct object_surface *obj_surface,
const VARectangle *rect)
@@ -3217,11 +3752,12 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data,
const int U = obj_image->image.format.fourcc == obj_surface->fourcc ? 1 : 2;
const int V = obj_image->image.format.fourcc == obj_surface->fourcc ? 2 : 1;
unsigned int tiling, swizzle;
+ VAStatus va_status = VA_STATUS_SUCCESS;
if (!obj_surface->bo)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
- assert(obj_surface->fourcc);
+ ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
if (tiling != I915_TILING_NONE)
@@ -3230,7 +3766,7 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data,
dri_bo_map(obj_surface->bo, 0);
if (!obj_surface->bo->virtual)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
/* Dest VA image has either I420 or YV12 format.
Source VA surface alway has I420 format */
@@ -3266,18 +3802,21 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data,
drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
else
dri_bo_unmap(obj_surface->bo);
+
+ return va_status;
}
-static void
+static VAStatus
get_image_nv12(struct object_image *obj_image, uint8_t *image_data,
struct object_surface *obj_surface,
const VARectangle *rect)
{
uint8_t *dst[2], *src[2];
unsigned int tiling, swizzle;
+ VAStatus va_status = VA_STATUS_SUCCESS;
if (!obj_surface->bo)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
assert(obj_surface->fourcc);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
@@ -3288,7 +3827,7 @@ get_image_nv12(struct object_image *obj_image, uint8_t *image_data,
dri_bo_map(obj_surface->bo, 0);
if (!obj_surface->bo->virtual)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
/* Both dest VA image and source surface have NV12 format */
dst[0] = image_data + obj_image->image.offsets[0];
@@ -3314,18 +3853,21 @@ get_image_nv12(struct object_image *obj_image, uint8_t *image_data,
drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
else
dri_bo_unmap(obj_surface->bo);
+
+ return va_status;
}
-static void
+static VAStatus
get_image_yuy2(struct object_image *obj_image, uint8_t *image_data,
struct object_surface *obj_surface,
const VARectangle *rect)
{
uint8_t *dst, *src;
unsigned int tiling, swizzle;
+ VAStatus va_status = VA_STATUS_SUCCESS;
if (!obj_surface->bo)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
assert(obj_surface->fourcc);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
@@ -3336,7 +3878,7 @@ get_image_yuy2(struct object_image *obj_image, uint8_t *image_data,
dri_bo_map(obj_surface->bo, 0);
if (!obj_surface->bo->virtual)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
/* Both dest VA image and source surface have YUYV format */
dst = image_data + obj_image->image.offsets[0];
@@ -3353,121 +3895,64 @@ get_image_yuy2(struct object_image *obj_image, uint8_t *image_data,
drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
else
dri_bo_unmap(obj_surface->bo);
+
+ return va_status;
}
static VAStatus
i965_sw_getimage(VADriverContextP ctx,
- VASurfaceID surface,
- int x, /* coordinates of the upper left source pixel */
- int y,
- unsigned int width, /* width and height of the region */
- unsigned int height,
- VAImageID image)
+ struct object_surface *obj_surface, struct object_image *obj_image,
+ const VARectangle *rect)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct i965_render_state *render_state = &i965->render_state;
-
- struct object_surface *obj_surface = SURFACE(surface);
- if (!obj_surface)
- return VA_STATUS_ERROR_INVALID_SURFACE;
-
- struct object_image *obj_image = IMAGE(image);
- if (!obj_image)
- return VA_STATUS_ERROR_INVALID_IMAGE;
-
- if (x < 0 || y < 0)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (x + width > obj_surface->orig_width ||
- y + height > obj_surface->orig_height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (x + width > obj_image->image.width ||
- y + height > obj_image->image.height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
+ void *image_data = NULL;
+ VAStatus va_status;
if (obj_surface->fourcc != obj_image->image.format.fourcc)
return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
- VAStatus va_status;
- void *image_data = NULL;
-
va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
- VARectangle rect;
- rect.x = x;
- rect.y = y;
- rect.width = width;
- rect.height = height;
-
switch (obj_image->image.format.fourcc) {
- case VA_FOURCC('Y','V','1','2'):
- case VA_FOURCC('I','4','2','0'):
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_I420:
/* I420 is native format for MPEG-2 decoded surfaces */
if (render_state->interleaved_uv)
goto operation_failed;
- get_image_i420(obj_image, image_data, obj_surface, &rect);
+ get_image_i420(obj_image, image_data, obj_surface, rect);
break;
- case VA_FOURCC('N','V','1','2'):
+ case VA_FOURCC_NV12:
/* NV12 is native format for H.264 decoded surfaces */
if (!render_state->interleaved_uv)
goto operation_failed;
- get_image_nv12(obj_image, image_data, obj_surface, &rect);
+ get_image_nv12(obj_image, image_data, obj_surface, rect);
break;
- case VA_FOURCC('Y','U','Y','2'):
+ case VA_FOURCC_YUY2:
/* YUY2 is the format supported by overlay plane */
- get_image_yuy2(obj_image, image_data, obj_surface, &rect);
+ get_image_yuy2(obj_image, image_data, obj_surface, rect);
break;
default:
operation_failed:
va_status = VA_STATUS_ERROR_OPERATION_FAILED;
break;
}
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
- i965_UnmapBuffer(ctx, obj_image->image.buf);
+ va_status = i965_UnmapBuffer(ctx, obj_image->image.buf);
return va_status;
}
static VAStatus
i965_hw_getimage(VADriverContextP ctx,
- VASurfaceID surface,
- int x, /* coordinates of the upper left source pixel */
- int y,
- unsigned int width, /* width and height of the region */
- unsigned int height,
- VAImageID image)
+ struct object_surface *obj_surface, struct object_image *obj_image,
+ const VARectangle *rect)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_surface src_surface;
struct i965_surface dst_surface;
- VAStatus va_status;
- VARectangle rect;
- struct object_surface *obj_surface = SURFACE(surface);
- struct object_image *obj_image = IMAGE(image);
-
- if (!obj_surface)
- return VA_STATUS_ERROR_INVALID_SURFACE;
-
- if (!obj_image)
- return VA_STATUS_ERROR_INVALID_IMAGE;
-
- if (x < 0 || y < 0)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (x + width > obj_surface->orig_width ||
- y + height > obj_surface->orig_height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (x + width > obj_image->image.width ||
- y + height > obj_image->image.height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
-
- if (!obj_surface->bo)
- return VA_STATUS_SUCCESS;
- assert(obj_image->bo); // image bo is always created, see i965_CreateImage()
-
- rect.x = x;
- rect.y = y;
- rect.width = width;
- rect.height = height;
src_surface.base = (struct object_base *)obj_surface;
src_surface.type = I965_SURFACE_TYPE_SURFACE;
@@ -3477,14 +3962,7 @@ i965_hw_getimage(VADriverContextP ctx,
dst_surface.type = I965_SURFACE_TYPE_IMAGE;
dst_surface.flags = I965_SURFACE_FLAG_FRAME;
- va_status = i965_image_processing(ctx,
- &src_surface,
- &rect,
- &dst_surface,
- &rect);
-
-
- return va_status;
+ return i965_image_processing(ctx, &src_surface, rect, &dst_surface, rect);
}
VAStatus
@@ -3497,25 +3975,46 @@ i965_GetImage(VADriverContextP ctx,
VAImageID image)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct object_surface * const obj_surface = SURFACE(surface);
+ struct object_image * const obj_image = IMAGE(image);
+ VARectangle rect;
VAStatus va_status;
+ if (!obj_surface)
+ return VA_STATUS_ERROR_INVALID_SURFACE;
+ if (!obj_surface->bo) /* don't get anything, keep previous data */
+ return VA_STATUS_SUCCESS;
+ if (is_surface_busy(i965, obj_surface))
+ return VA_STATUS_ERROR_SURFACE_BUSY;
+
+ if (!obj_image || !obj_image->bo)
+ return VA_STATUS_ERROR_INVALID_IMAGE;
+ if (is_image_busy(i965, obj_image))
+ return VA_STATUS_ERROR_SURFACE_BUSY;
+
+ if (x < 0 || y < 0)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ if (x + width > obj_surface->orig_width ||
+ y + height > obj_surface->orig_height)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ if (x + width > obj_image->image.width ||
+ y + height > obj_image->image.height)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+ rect.x = x;
+ rect.y = y;
+ rect.width = width;
+ rect.height = height;
+
if (HAS_ACCELERATED_GETIMAGE(i965))
- va_status = i965_hw_getimage(ctx,
- surface,
- x, y,
- width, height,
- image);
+ va_status = i965_hw_getimage(ctx, obj_surface, obj_image, &rect);
else
- va_status = i965_sw_getimage(ctx,
- surface,
- x, y,
- width, height,
- image);
+ va_status = i965_sw_getimage(ctx, obj_surface, obj_image, &rect);
return va_status;
}
-static void
+static VAStatus
put_image_i420(struct object_surface *obj_surface,
const VARectangle *dst_rect,
struct object_image *obj_image, uint8_t *image_data,
@@ -3526,13 +4025,13 @@ put_image_i420(struct object_surface *obj_surface,
const int U = obj_image->image.format.fourcc == obj_surface->fourcc ? 1 : 2;
const int V = obj_image->image.format.fourcc == obj_surface->fourcc ? 2 : 1;
unsigned int tiling, swizzle;
+ VAStatus va_status = VA_STATUS_SUCCESS;
- if (!obj_surface->bo)
- return;
+ ASSERT_RET(obj_surface->bo, VA_STATUS_ERROR_INVALID_SURFACE);
- assert(obj_surface->fourcc);
- assert(dst_rect->width == src_rect->width);
- assert(dst_rect->height == src_rect->height);
+ ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
+ ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED);
+ ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
if (tiling != I915_TILING_NONE)
@@ -3541,7 +4040,7 @@ put_image_i420(struct object_surface *obj_surface,
dri_bo_map(obj_surface->bo, 0);
if (!obj_surface->bo->virtual)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
/* Dest VA image has either I420 or YV12 format.
Source VA surface alway has I420 format */
@@ -3577,9 +4076,11 @@ put_image_i420(struct object_surface *obj_surface,
drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
else
dri_bo_unmap(obj_surface->bo);
+
+ return va_status;
}
-static void
+static VAStatus
put_image_nv12(struct object_surface *obj_surface,
const VARectangle *dst_rect,
struct object_image *obj_image, uint8_t *image_data,
@@ -3587,13 +4088,14 @@ put_image_nv12(struct object_surface *obj_surface,
{
uint8_t *dst[2], *src[2];
unsigned int tiling, swizzle;
+ VAStatus va_status = VA_STATUS_SUCCESS;
if (!obj_surface->bo)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
- assert(obj_surface->fourcc);
- assert(dst_rect->width == src_rect->width);
- assert(dst_rect->height == src_rect->height);
+ ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
+ ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED);
+ ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
if (tiling != I915_TILING_NONE)
@@ -3602,7 +4104,7 @@ put_image_nv12(struct object_surface *obj_surface,
dri_bo_map(obj_surface->bo, 0);
if (!obj_surface->bo->virtual)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
/* Both dest VA image and source surface have NV12 format */
dst[0] = (uint8_t *)obj_surface->bo->virtual;
@@ -3628,9 +4130,11 @@ put_image_nv12(struct object_surface *obj_surface,
drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
else
dri_bo_unmap(obj_surface->bo);
+
+ return va_status;
}
-static void
+static VAStatus
put_image_yuy2(struct object_surface *obj_surface,
const VARectangle *dst_rect,
struct object_image *obj_image, uint8_t *image_data,
@@ -3638,13 +4142,12 @@ put_image_yuy2(struct object_surface *obj_surface,
{
uint8_t *dst, *src;
unsigned int tiling, swizzle;
+ VAStatus va_status = VA_STATUS_SUCCESS;
- if (!obj_surface->bo)
- return;
-
- assert(obj_surface->fourcc);
- assert(dst_rect->width == src_rect->width);
- assert(dst_rect->height == src_rect->height);
+ ASSERT_RET(obj_surface->bo, VA_STATUS_ERROR_INVALID_SURFACE);
+ ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
+ ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED);
+ ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
if (tiling != I915_TILING_NONE)
@@ -3653,7 +4156,7 @@ put_image_yuy2(struct object_surface *obj_surface,
dri_bo_map(obj_surface->bo, 0);
if (!obj_surface->bo->virtual)
- return;
+ return VA_STATUS_ERROR_INVALID_SURFACE;
/* Both dest VA image and source surface have YUY2 format */
dst = (uint8_t *)obj_surface->bo->virtual;
@@ -3670,45 +4173,21 @@ put_image_yuy2(struct object_surface *obj_surface,
drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
else
dri_bo_unmap(obj_surface->bo);
-}
+ return va_status;
+}
static VAStatus
i965_sw_putimage(VADriverContextP ctx,
- VASurfaceID surface,
- VAImageID image,
- int src_x,
- int src_y,
- unsigned int src_width,
- unsigned int src_height,
- int dest_x,
- int dest_y,
- unsigned int dest_width,
- unsigned int dest_height)
+ struct object_surface *obj_surface, struct object_image *obj_image,
+ const VARectangle *src_rect, const VARectangle *dst_rect)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct object_surface *obj_surface = SURFACE(surface);
-
- if (!obj_surface)
- return VA_STATUS_ERROR_INVALID_SURFACE;
-
- struct object_image *obj_image = IMAGE(image);
- if (!obj_image)
- return VA_STATUS_ERROR_INVALID_IMAGE;
-
- if (src_x < 0 || src_y < 0)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (src_x + src_width > obj_image->image.width ||
- src_y + src_height > obj_image->image.height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (dest_x < 0 || dest_y < 0)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (dest_x + dest_width > obj_surface->orig_width ||
- dest_y + dest_height > obj_surface->orig_height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
+ VAStatus va_status = VA_STATUS_SUCCESS;
+ void *image_data = NULL;
/* XXX: don't allow scaling */
- if (src_width != dest_width || src_height != dest_height)
+ if (src_rect->width != dst_rect->width ||
+ src_rect->height != dst_rect->height)
return VA_STATUS_ERROR_INVALID_PARAMETER;
if (obj_surface->fourcc) {
@@ -3719,7 +4198,7 @@ i965_sw_putimage(VADriverContextP ctx,
else {
/* VA is surface not used for decoding, use same VA image format */
- i965_check_alloc_surface_bo(
+ va_status = i965_check_alloc_surface_bo(
ctx,
obj_surface,
0, /* XXX: don't use tiled surface */
@@ -3727,80 +4206,42 @@ i965_sw_putimage(VADriverContextP ctx,
get_sampling_from_fourcc (obj_image->image.format.fourcc));
}
- VAStatus va_status;
- void *image_data = NULL;
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
-
- VARectangle src_rect, dest_rect;
- src_rect.x = src_x;
- src_rect.y = src_y;
- src_rect.width = src_width;
- src_rect.height = src_height;
- dest_rect.x = dest_x;
- dest_rect.y = dest_y;
- dest_rect.width = dest_width;
- dest_rect.height = dest_height;
switch (obj_image->image.format.fourcc) {
- case VA_FOURCC('Y','V','1','2'):
- case VA_FOURCC('I','4','2','0'):
- put_image_i420(obj_surface, &dest_rect, obj_image, image_data, &src_rect);
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_I420:
+ va_status = put_image_i420(obj_surface, dst_rect, obj_image, image_data, src_rect);
break;
- case VA_FOURCC('N','V','1','2'):
- put_image_nv12(obj_surface, &dest_rect, obj_image, image_data, &src_rect);
+ case VA_FOURCC_NV12:
+ va_status = put_image_nv12(obj_surface, dst_rect, obj_image, image_data, src_rect);
break;
- case VA_FOURCC('Y','U','Y','2'):
- put_image_yuy2(obj_surface, &dest_rect, obj_image, image_data, &src_rect);
+ case VA_FOURCC_YUY2:
+ va_status = put_image_yuy2(obj_surface, dst_rect, obj_image, image_data, src_rect);
break;
default:
va_status = VA_STATUS_ERROR_OPERATION_FAILED;
break;
}
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
- i965_UnmapBuffer(ctx, obj_image->image.buf);
+ va_status = i965_UnmapBuffer(ctx, obj_image->image.buf);
return va_status;
}
static VAStatus
i965_hw_putimage(VADriverContextP ctx,
- VASurfaceID surface,
- VAImageID image,
- int src_x,
- int src_y,
- unsigned int src_width,
- unsigned int src_height,
- int dest_x,
- int dest_y,
- unsigned int dest_width,
- unsigned int dest_height)
+ struct object_surface *obj_surface, struct object_image *obj_image,
+ const VARectangle *src_rect, const VARectangle *dst_rect)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct object_surface *obj_surface = SURFACE(surface);
- struct object_image *obj_image = IMAGE(image);
struct i965_surface src_surface, dst_surface;
VAStatus va_status = VA_STATUS_SUCCESS;
- VARectangle src_rect, dst_rect;
-
- if (!obj_surface)
- return VA_STATUS_ERROR_INVALID_SURFACE;
-
- if (!obj_image || !obj_image->bo)
- return VA_STATUS_ERROR_INVALID_IMAGE;
-
- if (src_x < 0 ||
- src_y < 0 ||
- src_x + src_width > obj_image->image.width ||
- src_y + src_height > obj_image->image.height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
-
- if (dest_x < 0 ||
- dest_y < 0 ||
- dest_x + dest_width > obj_surface->orig_width ||
- dest_y + dest_height > obj_surface->orig_height)
- return VA_STATUS_ERROR_INVALID_PARAMETER;
if (!obj_surface->bo) {
unsigned int tiling, swizzle;
@@ -3814,29 +4255,21 @@ i965_hw_putimage(VADriverContextP ctx,
surface_sampling);
}
- assert(obj_surface->fourcc);
+ ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE);
src_surface.base = (struct object_base *)obj_image;
src_surface.type = I965_SURFACE_TYPE_IMAGE;
src_surface.flags = I965_SURFACE_FLAG_FRAME;
- src_rect.x = src_x;
- src_rect.y = src_y;
- src_rect.width = src_width;
- src_rect.height = src_height;
dst_surface.base = (struct object_base *)obj_surface;
dst_surface.type = I965_SURFACE_TYPE_SURFACE;
dst_surface.flags = I965_SURFACE_FLAG_FRAME;
- dst_rect.x = dest_x;
- dst_rect.y = dest_y;
- dst_rect.width = dest_width;
- dst_rect.height = dest_height;
va_status = i965_image_processing(ctx,
&src_surface,
- &src_rect,
+ src_rect,
&dst_surface,
- &dst_rect);
+ dst_rect);
return va_status;
}
@@ -3854,33 +4287,50 @@ i965_PutImage(VADriverContextP ctx,
unsigned int dest_width,
unsigned int dest_height)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- VAStatus va_status = VA_STATUS_SUCCESS;
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct object_surface * const obj_surface = SURFACE(surface);
+ struct object_image * const obj_image = IMAGE(image);
+ VARectangle src_rect, dst_rect;
+ VAStatus va_status;
+
+ if (!obj_surface)
+ return VA_STATUS_ERROR_INVALID_SURFACE;
+ if (is_surface_busy(i965, obj_surface))
+ return VA_STATUS_ERROR_SURFACE_BUSY;
+
+ if (!obj_image || !obj_image->bo)
+ return VA_STATUS_ERROR_INVALID_IMAGE;
+ if (is_image_busy(i965, obj_image))
+ return VA_STATUS_ERROR_SURFACE_BUSY;
+
+ if (src_x < 0 ||
+ src_y < 0 ||
+ src_x + src_width > obj_image->image.width ||
+ src_y + src_height > obj_image->image.height)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+ src_rect.x = src_x;
+ src_rect.y = src_y;
+ src_rect.width = src_width;
+ src_rect.height = src_height;
+
+ if (dest_x < 0 ||
+ dest_y < 0 ||
+ dest_x + dest_width > obj_surface->orig_width ||
+ dest_y + dest_height > obj_surface->orig_height)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+ dst_rect.x = dest_x;
+ dst_rect.y = dest_y;
+ dst_rect.width = dest_width;
+ dst_rect.height = dest_height;
if (HAS_ACCELERATED_PUTIMAGE(i965))
- va_status = i965_hw_putimage(ctx,
- surface,
- image,
- src_x,
- src_y,
- src_width,
- src_height,
- dest_x,
- dest_y,
- dest_width,
- dest_height);
+ va_status = i965_hw_putimage(ctx, obj_surface, obj_image,
+ &src_rect, &dst_rect);
else
- va_status = i965_sw_putimage(ctx,
- surface,
- image,
- src_x,
- src_y,
- src_width,
- src_height,
- dest_x,
- dest_y,
- dest_width,
- dest_height);
+ va_status = i965_sw_putimage(ctx, obj_surface, obj_image,
+ &src_rect, &dst_rect);
return va_status;
}
@@ -3937,10 +4387,7 @@ i965_BufferInfo(
i965 = i965_driver_data(ctx);
obj_buffer = BUFFER(buf_id);
- assert(obj_buffer);
-
- if (!obj_buffer)
- return VA_STATUS_ERROR_INVALID_BUFFER;
+ ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER);
*type = obj_buffer->type;
*size = obj_buffer->size_element;
@@ -3969,15 +4416,15 @@ i965_LockSurface(
struct object_surface *obj_surface = NULL;
VAImage tmpImage;
- assert(fourcc);
- assert(luma_stride);
- assert(chroma_u_stride);
- assert(chroma_v_stride);
- assert(luma_offset);
- assert(chroma_u_offset);
- assert(chroma_v_offset);
- assert(buffer_name);
- assert(buffer);
+ ASSERT_RET(fourcc, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(luma_stride, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(chroma_u_stride, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(chroma_v_stride, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(luma_offset, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(chroma_u_offset, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(chroma_v_offset, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(buffer_name, VA_STATUS_ERROR_INVALID_PARAMETER);
+ ASSERT_RET(buffer, VA_STATUS_ERROR_INVALID_PARAMETER);
tmpImage.image_id = VA_INVALID_ID;
@@ -4112,41 +4559,42 @@ i965_GetSurfaceAttributes(
attrib_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
if (attrib_list[i].value.value.i == 0) {
- if (IS_G4X(i965->intel.device_id)) {
+ if (IS_G4X(i965->intel.device_info)) {
if (obj_config->profile == VAProfileMPEG2Simple ||
obj_config->profile == VAProfileMPEG2Main) {
- attrib_list[i].value.value.i = VA_FOURCC('I', '4', '2', '0');
+ attrib_list[i].value.value.i = VA_FOURCC_I420;
} else {
assert(0);
attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED;
}
- } else if (IS_IRONLAKE(i965->intel.device_id)) {
+ } else if (IS_IRONLAKE(i965->intel.device_info)) {
if (obj_config->profile == VAProfileMPEG2Simple ||
obj_config->profile == VAProfileMPEG2Main) {
- attrib_list[i].value.value.i = VA_FOURCC('I', '4', '2', '0');
- } else if (obj_config->profile == VAProfileH264Baseline ||
+ attrib_list[i].value.value.i = VA_FOURCC_I420;
+ } else if (obj_config->profile == VAProfileH264ConstrainedBaseline ||
obj_config->profile == VAProfileH264Main ||
obj_config->profile == VAProfileH264High) {
- attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attrib_list[i].value.value.i = VA_FOURCC_NV12;
} else if (obj_config->profile == VAProfileNone) {
- attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attrib_list[i].value.value.i = VA_FOURCC_NV12;
} else {
assert(0);
attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED;
}
- } else if (IS_GEN6(i965->intel.device_id)) {
- attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
- } else if (IS_GEN7(i965->intel.device_id)) {
+ } else if (IS_GEN6(i965->intel.device_info)) {
+ attrib_list[i].value.value.i = VA_FOURCC_NV12;
+ } else if (IS_GEN7(i965->intel.device_info) ||
+ IS_GEN8(i965->intel.device_info)) {
if (obj_config->profile == VAProfileJPEGBaseline)
attrib_list[i].value.value.i = 0; /* internal format */
else
- attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attrib_list[i].value.value.i = VA_FOURCC_NV12;
}
} else {
- if (IS_G4X(i965->intel.device_id)) {
+ if (IS_G4X(i965->intel.device_info)) {
if (obj_config->profile == VAProfileMPEG2Simple ||
obj_config->profile == VAProfileMPEG2Main) {
- if (attrib_list[i].value.value.i != VA_FOURCC('I', '4', '2', '0')) {
+ if (attrib_list[i].value.value.i != VA_FOURCC_I420) {
attrib_list[i].value.value.i = 0;
attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE;
}
@@ -4154,30 +4602,30 @@ i965_GetSurfaceAttributes(
assert(0);
attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED;
}
- } else if (IS_IRONLAKE(i965->intel.device_id)) {
+ } else if (IS_IRONLAKE(i965->intel.device_info)) {
if (obj_config->profile == VAProfileMPEG2Simple ||
obj_config->profile == VAProfileMPEG2Main) {
- if (attrib_list[i].value.value.i != VA_FOURCC('I', '4', '2', '0')) {
+ if (attrib_list[i].value.value.i != VA_FOURCC_I420) {
attrib_list[i].value.value.i = 0;
attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE;
}
- } else if (obj_config->profile == VAProfileH264Baseline ||
+ } else if (obj_config->profile == VAProfileH264ConstrainedBaseline ||
obj_config->profile == VAProfileH264Main ||
obj_config->profile == VAProfileH264High) {
- if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) {
+ if (attrib_list[i].value.value.i != VA_FOURCC_NV12) {
attrib_list[i].value.value.i = 0;
attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE;
}
} else if (obj_config->profile == VAProfileNone) {
switch (attrib_list[i].value.value.i) {
- case VA_FOURCC('N', 'V', '1', '2'):
- case VA_FOURCC('I', '4', '2', '0'):
- case VA_FOURCC('Y', 'V', '1', '2'):
- case VA_FOURCC('Y', 'U', 'Y', '2'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
- case VA_FOURCC('B', 'G', 'R', 'X'):
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('R', 'G', 'B', 'A'):
+ case VA_FOURCC_NV12:
+ case VA_FOURCC_I420:
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_BGRX:
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_RGBA:
break;
default:
attrib_list[i].value.value.i = 0;
@@ -4188,18 +4636,18 @@ i965_GetSurfaceAttributes(
assert(0);
attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED;
}
- } else if (IS_GEN6(i965->intel.device_id)) {
+ } else if (IS_GEN6(i965->intel.device_info)) {
if (obj_config->entrypoint == VAEntrypointEncSlice ||
obj_config->entrypoint == VAEntrypointVideoProc) {
switch (attrib_list[i].value.value.i) {
- case VA_FOURCC('N', 'V', '1', '2'):
- case VA_FOURCC('I', '4', '2', '0'):
- case VA_FOURCC('Y', 'V', '1', '2'):
- case VA_FOURCC('Y', 'U', 'Y', '2'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
- case VA_FOURCC('B', 'G', 'R', 'X'):
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('R', 'G', 'B', 'A'):
+ case VA_FOURCC_NV12:
+ case VA_FOURCC_I420:
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_BGRX:
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_RGBA:
break;
default:
attrib_list[i].value.value.i = 0;
@@ -4207,18 +4655,19 @@ i965_GetSurfaceAttributes(
break;
}
} else {
- if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) {
+ if (attrib_list[i].value.value.i != VA_FOURCC_NV12) {
attrib_list[i].value.value.i = 0;
attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE;
}
}
- } else if (IS_GEN7(i965->intel.device_id)) {
+ } else if (IS_GEN7(i965->intel.device_info) ||
+ IS_GEN8(i965->intel.device_info)) {
if (obj_config->entrypoint == VAEntrypointEncSlice ||
obj_config->entrypoint == VAEntrypointVideoProc) {
switch (attrib_list[i].value.value.i) {
- case VA_FOURCC('N', 'V', '1', '2'):
- case VA_FOURCC('I', '4', '2', '0'):
- case VA_FOURCC('Y', 'V', '1', '2'):
+ case VA_FOURCC_NV12:
+ case VA_FOURCC_I420:
+ case VA_FOURCC_YV12:
break;
default:
attrib_list[i].value.value.i = 0;
@@ -4230,7 +4679,7 @@ i965_GetSurfaceAttributes(
attrib_list[i].value.value.i = 0; /* JPEG decoding always uses an internal format */
attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE;
} else {
- if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) {
+ if (attrib_list[i].value.value.i != VA_FOURCC_NV12) {
attrib_list[i].value.value.i = 0;
attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE;
}
@@ -4292,47 +4741,50 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx,
attribs = malloc(I965_MAX_SURFACE_ATTRIBUTES *sizeof(*attribs));
- if (IS_G4X(i965->intel.device_id)) {
+ if (attribs == NULL)
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
+ if (IS_G4X(i965->intel.device_info)) {
if (obj_config->profile == VAProfileMPEG2Simple ||
obj_config->profile == VAProfileMPEG2Main) {
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0');
+ attribs[i].value.value.i = VA_FOURCC_I420;
i++;
}
- } else if (IS_IRONLAKE(i965->intel.device_id)) {
+ } else if (IS_IRONLAKE(i965->intel.device_info)) {
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0');
+ attribs[i].value.value.i = VA_FOURCC_I420;
i++;
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_NV12;
i++;
case VAProfileNone:
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_NV12;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0');
+ attribs[i].value.value.i = VA_FOURCC_I420;
i++;
break;
@@ -4340,102 +4792,102 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx,
default:
break;
}
- } else if (IS_GEN6(i965->intel.device_id)) {
+ } else if (IS_GEN6(i965->intel.device_info)) {
if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_NV12;
i++;
} else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */
obj_config->entrypoint == VAEntrypointVideoProc) { /* vpp */
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_NV12;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0');
+ attribs[i].value.value.i = VA_FOURCC_I420;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_YV12;
i++;
if (obj_config->entrypoint == VAEntrypointVideoProc) {
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2');
+ attribs[i].value.value.i = VA_FOURCC_YUY2;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A');
+ attribs[i].value.value.i = VA_FOURCC_RGBA;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X');
+ attribs[i].value.value.i = VA_FOURCC_RGBX;
i++;
}
}
- } else if (IS_GEN7(i965->intel.device_id)) {
+ } else if (IS_GEN7(i965->intel.device_info)) {
if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */
if (obj_config->profile == VAProfileJPEGBaseline) {
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3');
+ attribs[i].value.value.i = VA_FOURCC_IMC3;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '1');
+ attribs[i].value.value.i = VA_FOURCC_IMC1;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('Y', '8', '0', '0');
+ attribs[i].value.value.i = VA_FOURCC_Y800;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('4', '1', '1', 'P');
+ attribs[i].value.value.i = VA_FOURCC_411P;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'H');
+ attribs[i].value.value.i = VA_FOURCC_422H;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'V');
+ attribs[i].value.value.i = VA_FOURCC_422V;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('4', '4', '4', 'P');
+ attribs[i].value.value.i = VA_FOURCC_444P;
i++;
} else {
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_NV12;
i++;
}
} else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */
@@ -4443,44 +4895,178 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx,
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_NV12;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_I420;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_YV12;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_IMC3;
+ i++;
+
+ if (obj_config->entrypoint == VAEntrypointVideoProc) {
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_YUY2;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_RGBA;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_RGBX;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_BGRA;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_BGRX;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_YV16;
+ i++;
+ }
+ }
+ } else if (IS_GEN8(i965->intel.device_info)) {
+ if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */
+ if (obj_config->profile == VAProfileJPEGBaseline) {
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_IMC3;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_IMC1;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_Y800;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_411P;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_422H;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_422V;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_444P;
+ i++;
+ } else {
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_NV12;
+ i++;
+ }
+ } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */
+ obj_config->entrypoint == VAEntrypointVideoProc) { /* vpp */
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_NV12;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0');
+ attribs[i].value.value.i = VA_FOURCC_I420;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2');
+ attribs[i].value.value.i = VA_FOURCC_YV12;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3');
+ attribs[i].value.value.i = VA_FOURCC_IMC3;
i++;
if (obj_config->entrypoint == VAEntrypointVideoProc) {
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2');
+ attribs[i].value.value.i = VA_FOURCC_YUY2;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_RGBA;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_RGBX;
+ i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_BGRA;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A');
+ attribs[i].value.value.i = VA_FOURCC_BGRX;
i++;
attribs[i].type = VASurfaceAttribPixelFormat;
attribs[i].value.type = VAGenericValueTypeInteger;
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
- attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X');
+ attribs[i].value.value.i = VA_FOURCC_YV16;
i++;
}
}
@@ -4512,6 +5098,159 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx,
return vaStatus;
}
+
+/* Acquires buffer handle for external API usage (internal implementation) */
+static VAStatus
+i965_acquire_buffer_handle(struct object_buffer *obj_buffer,
+ uint32_t mem_type, VABufferInfo *out_buf_info)
+{
+ struct buffer_store *buffer_store;
+
+ buffer_store = obj_buffer->buffer_store;
+ if (!buffer_store || !buffer_store->bo)
+ return VA_STATUS_ERROR_INVALID_BUFFER;
+
+ /* Synchronization point */
+ drm_intel_bo_wait_rendering(buffer_store->bo);
+
+ if (obj_buffer->export_refcount > 0) {
+ if (obj_buffer->export_state.mem_type != mem_type)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ }
+ else {
+ VABufferInfo * const buf_info = &obj_buffer->export_state;
+
+ switch (mem_type) {
+ case VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM: {
+ uint32_t name;
+ if (drm_intel_bo_flink(buffer_store->bo, &name) != 0)
+ return VA_STATUS_ERROR_INVALID_BUFFER;
+ buf_info->handle = name;
+ break;
+ }
+ case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: {
+ int fd;
+ if (drm_intel_bo_gem_export_to_prime(buffer_store->bo, &fd) != 0)
+ return VA_STATUS_ERROR_INVALID_BUFFER;
+ buf_info->handle = (intptr_t)fd;
+ break;
+ }
+ }
+
+ buf_info->type = obj_buffer->type;
+ buf_info->mem_type = mem_type;
+ buf_info->mem_size =
+ obj_buffer->num_elements * obj_buffer->size_element;
+ }
+
+ obj_buffer->export_refcount++;
+ *out_buf_info = obj_buffer->export_state;
+ return VA_STATUS_SUCCESS;
+}
+
+/* Releases buffer handle after usage (internal implementation) */
+static VAStatus
+i965_release_buffer_handle(struct object_buffer *obj_buffer)
+{
+ if (obj_buffer->export_refcount == 0)
+ return VA_STATUS_ERROR_INVALID_BUFFER;
+
+ if (--obj_buffer->export_refcount == 0) {
+ VABufferInfo * const buf_info = &obj_buffer->export_state;
+
+ switch (buf_info->mem_type) {
+ case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: {
+ close((intptr_t)buf_info->handle);
+ break;
+ }
+ }
+ buf_info->mem_type = 0;
+ }
+ return VA_STATUS_SUCCESS;
+}
+
+/** Acquires buffer handle for external API usage */
+static VAStatus
+i965_AcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id,
+ VABufferInfo *buf_info)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct object_buffer * const obj_buffer = BUFFER(buf_id);
+ uint32_t i, mem_type;
+
+ /* List of supported memory types, in preferred order */
+ static const uint32_t mem_types[] = {
+ VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME,
+ VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM,
+ 0
+ };
+
+ if (!obj_buffer)
+ return VA_STATUS_ERROR_INVALID_BUFFER;
+ /* XXX: only VA surface|image like buffers are supported for now */
+ if (obj_buffer->type != VAImageBufferType)
+ return VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE;
+
+ if (!buf_info)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+ if (!buf_info->mem_type)
+ mem_type = mem_types[0];
+ else {
+ mem_type = 0;
+ for (i = 0; mem_types[i] != 0; i++) {
+ if (buf_info->mem_type & mem_types[i]) {
+ mem_type = buf_info->mem_type;
+ break;
+ }
+ }
+ if (!mem_type)
+ return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE;
+ }
+ return i965_acquire_buffer_handle(obj_buffer, mem_type, buf_info);
+}
+
+/** Releases buffer handle after usage from external API */
+static VAStatus
+i965_ReleaseBufferHandle(VADriverContextP ctx, VABufferID buf_id)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct object_buffer * const obj_buffer = BUFFER(buf_id);
+
+ if (!obj_buffer)
+ return VA_STATUS_ERROR_INVALID_BUFFER;
+
+ return i965_release_buffer_handle(obj_buffer);
+}
+
+static int
+i965_os_has_ring_support(VADriverContextP ctx,
+ int ring)
+{
+ struct i965_driver_data *const i965 = i965_driver_data(ctx);
+
+ switch (ring) {
+ case I965_RING_BSD:
+ return i965->intel.has_bsd;
+
+ case I965_RING_BLT:
+ return i965->intel.has_blt;
+
+ case I965_RING_VEBOX:
+ return i965->intel.has_vebox;
+
+ case I965_RING_NULL:
+ return 1; /* Always support */
+
+ default:
+ /* should never get here */
+ assert(0);
+ break;
+ }
+
+ return 0;
+}
+
/*
* Query video processing pipeline
*/
@@ -4523,18 +5262,24 @@ VAStatus i965_QueryVideoProcFilters(
)
{
struct i965_driver_data *const i965 = i965_driver_data(ctx);
- unsigned int i = 0;
+ unsigned int i = 0, num = 0;
if (!num_filters || !filters)
return VA_STATUS_ERROR_INVALID_PARAMETER;
- for (i = 0; i < *num_filters && i < i965->codec_info->num_filters; i++)
- filters[i] = i965->codec_info->filters[i];
+ for (i = 0; i < i965->codec_info->num_filters; i++) {
+ if (i965_os_has_ring_support(ctx, i965->codec_info->filters[i].ring)) {
+ if (num == *num_filters) {
+ *num_filters = i965->codec_info->num_filters;
- *num_filters = i;
+ return VA_STATUS_ERROR_MAX_NUM_EXCEEDED;
+ }
+
+ filters[num++] = i965->codec_info->filters[i].type;
+ }
+ }
- if (i < i965->codec_info->num_filters)
- return VA_STATUS_ERROR_MAX_NUM_EXCEEDED;
+ *num_filters = num;
return VA_STATUS_SUCCESS;
}
@@ -4553,6 +5298,17 @@ VAStatus i965_QueryVideoProcFilterCaps(
if (!filter_caps || !num_filter_caps)
return VA_STATUS_ERROR_INVALID_PARAMETER;
+ for (i = 0; i < i965->codec_info->num_filters; i++) {
+ if (type == i965->codec_info->filters[i].type &&
+ i965_os_has_ring_support(ctx, i965->codec_info->filters[i].ring))
+ break;
+ }
+
+ if (i == i965->codec_info->num_filters)
+ return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
+
+ i = 0;
+
switch (type) {
case VAProcFilterNoiseReduction:
case VAProcFilterSharpening:
@@ -4592,7 +5348,13 @@ VAStatus i965_QueryVideoProcFilterCaps(
i++;
cap++;
}
- }
+
+ if (i965->codec_info->has_di_motion_compensated) {
+ cap->type = VAProcDeinterlacingMotionCompensated;
+ i++;
+ cap++;
+ }
+ }
break;
@@ -4616,7 +5378,7 @@ VAStatus i965_QueryVideoProcFilterCaps(
cap->type = VAProcColorBalanceSaturation;
cap->range.min_value = 0.0;
cap->range.max_value = 10.0;
- cap->range.default_value = 0.0;
+ cap->range.default_value = 1.0;
cap->range.step = 0.1;
i++;
cap++;
@@ -4632,7 +5394,7 @@ VAStatus i965_QueryVideoProcFilterCaps(
cap->type = VAProcColorBalanceContrast;
cap->range.min_value = 0.0;
cap->range.max_value = 10.0;
- cap->range.default_value = 0.0;
+ cap->range.default_value = 1.0;
cap->range.step = 0.1;
i++;
cap++;
@@ -4694,33 +5456,33 @@ VAStatus i965_QueryVideoProcPipelineCaps(
} else if (base->type == VAProcFilterDeinterlacing) {
VAProcFilterParameterBufferDeinterlacing *deint = (VAProcFilterParameterBufferDeinterlacing *)base;
- assert(deint->algorithm == VAProcDeinterlacingBob ||
- deint->algorithm == VAProcDeinterlacingMotionAdaptive);
+ ASSERT_RET(deint->algorithm == VAProcDeinterlacingBob ||
+ deint->algorithm == VAProcDeinterlacingMotionAdaptive ||
+ deint->algorithm == VAProcDeinterlacingMotionCompensated,
+ VA_STATUS_ERROR_INVALID_PARAMETER);
- if (deint->algorithm == VAProcDeinterlacingMotionAdaptive)
+ if (deint->algorithm == VAProcDeinterlacingMotionAdaptive ||
+ deint->algorithm == VAProcDeinterlacingMotionCompensated);
pipeline_cap->num_forward_references++;
+ } else if (base->type == VAProcFilterSkinToneEnhancement) {
+ VAProcFilterParameterBuffer *stde = (VAProcFilterParameterBuffer *)base;
+ (void)stde;
}
}
return VA_STATUS_SUCCESS;
}
+extern struct hw_codec_info *i965_get_codec_info(int devid);
+
static bool
i965_driver_data_init(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- if (IS_HASWELL(i965->intel.device_id))
- i965->codec_info = &gen75_hw_codec_info;
- else if (IS_G4X(i965->intel.device_id))
- i965->codec_info = &g4x_hw_codec_info;
- else if (IS_IRONLAKE(i965->intel.device_id))
- i965->codec_info = &ironlake_hw_codec_info;
- else if (IS_GEN6(i965->intel.device_id))
- i965->codec_info = &gen6_hw_codec_info;
- else if (IS_GEN7(i965->intel.device_id))
- i965->codec_info = &gen7_hw_codec_info;
- else
+ i965->codec_info = i965_get_codec_info(i965->intel.device_id);
+
+ if (!i965->codec_info)
return false;
if (object_heap_init(&i965->config_heap,
@@ -4750,6 +5512,7 @@ i965_driver_data_init(VADriverContextP ctx)
goto err_subpic_heap;
i965->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
+ i965->pp_batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
_i965InitMutex(&i965->render_mutex);
_i965InitMutex(&i965->pp_mutex);
@@ -4781,6 +5544,9 @@ i965_driver_data_terminate(VADriverContextP ctx)
if (i965->batch)
intel_batchbuffer_free(i965->batch);
+ if (i965->pp_batch)
+ intel_batchbuffer_free(i965->pp_batch);
+
i965_destroy_heap(&i965->subpic_heap, i965_destroy_subpic);
i965_destroy_heap(&i965->image_heap, i965_destroy_image);
i965_destroy_heap(&i965->buffer_heap, i965_destroy_buffer);
@@ -4841,11 +5607,51 @@ struct {
#endif
};
+static bool
+ensure_vendor_string(struct i965_driver_data *i965, const char *chipset)
+{
+ int ret, len;
+
+ if (i965->va_vendor[0] != '\0')
+ return true;
+
+ len = 0;
+ ret = snprintf(i965->va_vendor, sizeof(i965->va_vendor),
+ "%s %s driver for %s - %d.%d.%d",
+ INTEL_STR_DRIVER_VENDOR, INTEL_STR_DRIVER_NAME, chipset,
+ INTEL_DRIVER_MAJOR_VERSION, INTEL_DRIVER_MINOR_VERSION,
+ INTEL_DRIVER_MICRO_VERSION);
+ if (ret < 0 || ret >= sizeof(i965->va_vendor))
+ goto error;
+ len = ret;
+
+ if (INTEL_DRIVER_PRE_VERSION > 0) {
+ ret = snprintf(&i965->va_vendor[len], sizeof(i965->va_vendor) - len,
+ ".pre%d", INTEL_DRIVER_PRE_VERSION);
+ if (ret < 0 || ret >= sizeof(i965->va_vendor))
+ goto error;
+ len += ret;
+
+ ret = snprintf(&i965->va_vendor[len], sizeof(i965->va_vendor) - len,
+ " (%s)", INTEL_DRIVER_GIT_VERSION);
+ if (ret < 0 || ret >= sizeof(i965->va_vendor))
+ goto error;
+ len += ret;
+ }
+ return true;
+
+error:
+ i965->va_vendor[0] = '\0';
+ ASSERT_RET(ret > 0 && len < sizeof(i965->va_vendor), false);
+ return false;
+}
+
static VAStatus
i965_Init(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
int i;
+ const char *chipset;
for (i = 0; i < ARRAY_ELEMS(i965_sub_ops); i++) {
if ((i965_sub_ops[i].display_type == 0 ||
@@ -4854,19 +5660,22 @@ i965_Init(VADriverContextP ctx)
break;
}
+ if (i965->codec_info->preinit_hw_codec)
+ i965->codec_info->preinit_hw_codec(ctx, i965->codec_info);
+
if (i == ARRAY_ELEMS(i965_sub_ops)) {
- sprintf(i965->va_vendor, "%s %s driver - %d.%d.%d",
- INTEL_STR_DRIVER_VENDOR,
- INTEL_STR_DRIVER_NAME,
- INTEL_DRIVER_MAJOR_VERSION,
- INTEL_DRIVER_MINOR_VERSION,
- INTEL_DRIVER_MICRO_VERSION);
-
- if (INTEL_DRIVER_PRE_VERSION > 0) {
- const int len = strlen(i965->va_vendor);
- sprintf(&i965->va_vendor[len], ".pre%d", INTEL_DRIVER_PRE_VERSION);
+ switch (i965->intel.device_id) {
+#undef CHIPSET
+#define CHIPSET(id, family, dev, str) case id: chipset = str; break;
+#include "i965_pciids.h"
+ default:
+ chipset = "Unknown Intel Chipset";
+ break;
}
+ if (!ensure_vendor_string(i965, chipset))
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
i965->current_context_id = VA_INVALID_ID;
return VA_STATUS_SUCCESS;
@@ -4896,6 +5705,9 @@ i965_Terminate(VADriverContextP ctx)
i965_sub_ops[i - 1].display_type == (ctx->display_type & VA_DISPLAY_MAJOR_MASK)) {
i965_sub_ops[i - 1].terminate(ctx);
}
+
+ free(i965);
+ ctx->pDriverData = NULL;
}
return VA_STATUS_SUCCESS;
@@ -4925,7 +5737,6 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx )
vtable->vaTerminate = i965_Terminate;
vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints;
vtable->vaQueryConfigProfiles = i965_QueryConfigProfiles;
- vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints;
vtable->vaQueryConfigAttributes = i965_QueryConfigAttributes;
vtable->vaCreateConfig = i965_CreateConfig;
vtable->vaDestroyConfig = i965_DestroyConfig;
@@ -4970,6 +5781,10 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx )
vtable->vaQuerySurfaceAttributes = i965_QuerySurfaceAttributes;
vtable->vaCreateSurfaces2 = i965_CreateSurfaces2;
+ /* 0.36.0 */
+ vtable->vaAcquireBufferHandle = i965_AcquireBufferHandle;
+ vtable->vaReleaseBufferHandle = i965_ReleaseBufferHandle;
+
vtable_vpp->vaQueryVideoProcFilters = i965_QueryVideoProcFilters;
vtable_vpp->vaQueryVideoProcFilterCaps = i965_QueryVideoProcFilterCaps;
vtable_vpp->vaQueryVideoProcPipelineCaps = i965_QueryVideoProcPipelineCaps;
diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
index e694d67..629489f 100644
--- a/src/i965_drv_video.h
+++ b/src/i965_drv_video.h
@@ -40,8 +40,9 @@
#include "i965_mutext.h"
#include "object_heap.h"
#include "intel_driver.h"
+#include "i965_fourcc.h"
-#define I965_MAX_PROFILES 11
+#define I965_MAX_PROFILES 20
#define I965_MAX_ENTRYPOINTS 5
#define I965_MAX_CONFIG_ATTRIBUTES 10
#define I965_MAX_IMAGE_FORMATS 10
@@ -59,6 +60,16 @@
#define I965_SURFACE_FLAG_TOP_FIELD_FIRST 0x00000001
#define I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST 0x00000002
+#define DEFAULT_BRIGHTNESS 0
+#define DEFAULT_CONTRAST 50
+#define DEFAULT_HUE 0
+#define DEFAULT_SATURATION 50
+
+#define ENCODER_QUALITY_RANGE 2
+#define ENCODER_DEFAULT_QUALITY 1
+#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY
+#define ENCODER_LOW_QUALITY 2
+
struct i965_surface
{
struct object_base *base;
@@ -73,6 +84,7 @@ struct i965_kernel
const uint32_t (*bin)[4];
int size;
dri_bo *bo;
+ unsigned int kernel_offset;
};
struct buffer_store
@@ -94,14 +106,20 @@ struct object_config
#define NUM_SLICES 10
+struct codec_state_base {
+ uint32_t chroma_formats;
+};
+
struct decode_state
{
+ struct codec_state_base base;
struct buffer_store *pic_param;
struct buffer_store **slice_params;
struct buffer_store *iq_matrix;
struct buffer_store *bit_plane;
struct buffer_store *huffman_table;
struct buffer_store **slice_datas;
+ struct buffer_store *probability_data;
VASurfaceID current_render_target;
int max_slice_params;
int max_slice_datas;
@@ -112,8 +130,12 @@ struct decode_state
struct object_surface *reference_objects[16]; /* Up to 2 reference surfaces are valid for MPEG-2,*/
};
+#define SLICE_PACKED_DATA_INDEX_TYPE 0x80000000
+#define SLICE_PACKED_DATA_INDEX_MASK 0x00FFFFFF
+
struct encode_state
{
+ struct codec_state_base base;
struct buffer_store *seq_param;
struct buffer_store *pic_param;
struct buffer_store *pic_control;
@@ -131,9 +153,41 @@ struct encode_state
struct buffer_store **slice_params_ext;
int max_slice_params_ext;
int num_slice_params_ext;
+
+ /* Check the user-configurable packed_header attribute.
+ * Currently it is mainly used to check whether the packed slice_header data
+ * is provided by user or the driver.
+ * TBD: It will check for the packed SPS/PPS/MISC/RAWDATA and so on.
+ */
+ unsigned int packed_header_flag;
+ /* For the packed data that needs to be inserted into video clip */
+ /* currently it is mainly to track packed raw data and packed slice_header data. */
+ struct buffer_store **packed_header_params_ext;
+ int max_packed_header_params_ext;
+ int num_packed_header_params_ext;
+ struct buffer_store **packed_header_data_ext;
+ int max_packed_header_data_ext;
+ int num_packed_header_data_ext;
+
+ /* the index of current slice */
+ int slice_index;
+ /* the array is determined by max_slice_params_ext */
+ int max_slice_num;
+ /* This is to store the first index of packed data for one slice */
+ int *slice_rawdata_index;
+ /* This is to store the number of packed data for one slice.
+ * Both packed rawdata and slice_header data are tracked by this
+ * this variable. That is to say: When one packed slice_header is parsed,
+ * this variable will also be increased.
+ */
+ int *slice_rawdata_count;
+
+ /* This is to store the index of packed slice header for one slice */
+ int *slice_header_index;
+
int last_packed_header_type;
- struct buffer_store *misc_param[8];
+ struct buffer_store *misc_param[16];
VASurfaceID current_render_target;
struct object_surface *input_yuv_object;
@@ -144,6 +198,7 @@ struct encode_state
struct proc_state
{
+ struct codec_state_base base;
struct buffer_store *pipeline_param;
VASurfaceID current_render_target;
@@ -155,6 +210,7 @@ struct proc_state
union codec_state
{
+ struct codec_state_base base;
struct decode_state decode;
struct encode_state encode;
struct proc_state proc;
@@ -186,12 +242,8 @@ struct object_context
};
#define SURFACE_REFERENCED (1 << 0)
-#define SURFACE_DISPLAYED (1 << 1)
#define SURFACE_DERIVED (1 << 2)
-#define SURFACE_REF_DIS_MASK ((SURFACE_REFERENCED) | \
- (SURFACE_DISPLAYED))
#define SURFACE_ALL_MASK ((SURFACE_REFERENCED) | \
- (SURFACE_DISPLAYED) | \
(SURFACE_DERIVED))
struct object_surface
@@ -202,15 +254,16 @@ struct object_surface
struct object_subpic *obj_subpic[I965_MAX_SUBPIC_SUM];
unsigned int subpic_render_idx;
- int width;
- int height;
+ int width; /* the pitch of plane 0 in bytes in horizontal direction */
+ int height; /* the pitch of plane 0 in bytes in vertical direction */
int size;
- int orig_width;
- int orig_height;
+ int orig_width; /* the width of plane 0 in pixels */
+ int orig_height; /* the height of plane 0 in pixels */
int flags;
unsigned int fourcc;
dri_bo *bo;
VAImageID locked_image_id;
+ VAImageID derived_image_id;
void (*free_private_data)(void **data);
void *private_data;
unsigned int subsampling;
@@ -221,6 +274,10 @@ struct object_surface
int cb_cr_width;
int cb_cr_height;
int cb_cr_pitch;
+ /* user specified attributes see: VASurfaceAttribExternalBuffers/VA_SURFACE_ATTRIB_MEM_TYPE_VA */
+ uint32_t user_disable_tiling : 1;
+ uint32_t user_h_stride_set : 1;
+ uint32_t user_v_stride_set : 1;
};
struct object_buffer
@@ -231,6 +288,10 @@ struct object_buffer
int num_elements;
int size_element;
VABufferType type;
+
+ /* Export state */
+ unsigned int export_refcount;
+ VABufferInfo export_state;
};
struct object_image
@@ -258,13 +319,34 @@ struct object_subpic
unsigned int flags;
};
+#define I965_RING_NULL 0
+#define I965_RING_BSD 1
+#define I965_RING_BLT 2
+#define I965_RING_VEBOX 3
+
+struct i965_filter
+{
+ VAProcFilterType type;
+ int ring;
+};
+
struct hw_codec_info
{
struct hw_context *(*dec_hw_context_init)(VADriverContextP, struct object_config *);
struct hw_context *(*enc_hw_context_init)(VADriverContextP, struct object_config *);
struct hw_context *(*proc_hw_context_init)(VADriverContextP, struct object_config *);
+ bool (*render_init)(VADriverContextP);
+ void (*post_processing_context_init)(VADriverContextP, void *, struct intel_batchbuffer *);
+ void (*preinit_hw_codec)(VADriverContextP, struct hw_codec_info *);
+
int max_width;
int max_height;
+ int min_linear_wpitch;
+ int min_linear_hpitch;
+
+ unsigned int h264_mvc_dec_profiles;
+ unsigned int h264_dec_chroma_formats;
+ unsigned int jpeg_dec_chroma_formats;
unsigned int has_mpeg2_decoding:1;
unsigned int has_mpeg2_encoding:1;
@@ -280,9 +362,12 @@ struct hw_codec_info
unsigned int has_tiled_surface:1;
unsigned int has_di_motion_adptive:1;
unsigned int has_di_motion_compensated:1;
+ unsigned int has_vp8_decoding:1;
+ unsigned int has_vp8_encoding:1;
+ unsigned int has_h264_mvc_encoding:1;
unsigned int num_filters;
- VAProcFilterType filters[VAProcFilterCount];
+ struct i965_filter filters[VAProcFilterCount];
};
@@ -302,6 +387,7 @@ struct i965_driver_data
_I965Mutex render_mutex;
_I965Mutex pp_mutex;
struct intel_batchbuffer *batch;
+ struct intel_batchbuffer *pp_batch;
struct i965_render_state render_state;
void *pp_context;
char va_vendor[256];
@@ -309,6 +395,10 @@ struct i965_driver_data
VADisplayAttribute *display_attributes;
unsigned int num_display_attributes;
VADisplayAttribute *rotation_attrib;
+ VADisplayAttribute *brightness_attrib;
+ VADisplayAttribute *contrast_attrib;
+ VADisplayAttribute *hue_attrib;
+ VADisplayAttribute *saturation_attrib;
VAContextID current_context_id;
/* VA/DRI (X11) specific data */
@@ -344,7 +434,7 @@ i965_driver_data(VADriverContextP ctx)
return (struct i965_driver_data *)(ctx->pDriverData);
}
-void
+VAStatus
i965_check_alloc_surface_bo(VADriverContextP ctx,
struct object_surface *obj_surface,
int tiled,
@@ -355,8 +445,9 @@ int
va_enc_packed_type_to_idx(int packed_type);
/* reserve 2 byte for internal using */
-#define CODED_H264 0
-#define CODED_MPEG2 1
+#define CODEC_H264 0
+#define CODEC_MPEG2 1
+#define CODEC_H264_MVC 2
#define H264_DELIMITER0 0x00
#define H264_DELIMITER1 0x00
@@ -385,8 +476,22 @@ extern VAStatus i965_MapBuffer(VADriverContextP ctx,
extern VAStatus i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id);
+extern VAStatus i965_DestroySurfaces(VADriverContextP ctx,
+ VASurfaceID *surface_list,
+ int num_surfaces);
+
+extern VAStatus i965_CreateSurfaces(VADriverContextP ctx,
+ int width,
+ int height,
+ int format,
+ int num_surfaces,
+ VASurfaceID *surfaces);
+
#define I965_SURFACE_MEM_NATIVE 0
#define I965_SURFACE_MEM_GEM_FLINK 1
#define I965_SURFACE_MEM_DRM_PRIME 2
+void
+i965_destroy_surface_storage(struct object_surface *obj_surface);
+
#endif /* _I965_DRV_VIDEO_H_ */
diff --git a/src/i965_encoder.c b/src/i965_encoder.c
index 4384619..f66d889 100644
--- a/src/i965_encoder.c
+++ b/src/i965_encoder.c
@@ -44,18 +44,6 @@ extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_con
extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
extern Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
-VAStatus
-i965_DestroySurfaces(VADriverContextP ctx,
- VASurfaceID *surface_list,
- int num_surfaces);
-VAStatus
-i965_CreateSurfaces(VADriverContextP ctx,
- int width,
- int height,
- int format,
- int num_surfaces,
- VASurfaceID *surfaces);
-
static VAStatus
intel_encoder_check_yuv_surface(VADriverContextP ctx,
VAProfile profile,
@@ -81,7 +69,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx,
if (!obj_surface || !obj_surface->bo)
return VA_STATUS_ERROR_INVALID_PARAMETER;
- if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+ if (obj_surface->fourcc == VA_FOURCC_NV12) {
unsigned int tiling = 0, swizzle = 0;
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
@@ -116,7 +104,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx,
obj_surface = SURFACE(encoder_context->input_yuv_surface);
encode_state->input_yuv_object = obj_surface;
assert(obj_surface);
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dst_surface.base = (struct object_base *)obj_surface;
dst_surface.type = I965_SURFACE_TYPE_SURFACE;
@@ -135,6 +123,30 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx,
}
static VAStatus
+intel_encoder_check_misc_parameter(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+
+ if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] &&
+ encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) {
+ VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer;
+ VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data;
+ encoder_context->quality_level = param_quality_level->quality_level;
+
+ if (encoder_context->quality_level == 0)
+ encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
+ else if (encoder_context->quality_level > encoder_context->quality_range)
+ goto error;
+ }
+
+ return VA_STATUS_SUCCESS;
+
+error:
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+}
+
+static VAStatus
intel_encoder_check_avc_parameter(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
@@ -267,9 +279,11 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
VAStatus vaStatus;
switch (profile) {
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
+ case VAProfileH264MultiviewHigh:
+ case VAProfileH264StereoHigh:
vaStatus = intel_encoder_check_avc_parameter(ctx, encode_state, encoder_context);
break;
@@ -288,6 +302,9 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
+ if (vaStatus == VA_STATUS_SUCCESS)
+ vaStatus = intel_encoder_check_misc_parameter(ctx, encode_state, encoder_context);
+
out:
return vaStatus;
}
@@ -345,11 +362,43 @@ intel_enc_hw_context_init(VADriverContextP ctx,
encoder_context->input_yuv_surface = VA_INVALID_SURFACE;
encoder_context->is_tmp_id = 0;
encoder_context->rate_control_mode = VA_RC_NONE;
- encoder_context->profile = obj_config->profile;
+ encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
+ encoder_context->quality_range = 1;
+
+ switch (obj_config->profile) {
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ encoder_context->codec = CODEC_MPEG2;
+ break;
+
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ encoder_context->codec = CODEC_H264;
+ encoder_context->quality_range = ENCODER_QUALITY_RANGE;
+ break;
+
+ case VAProfileH264StereoHigh:
+ case VAProfileH264MultiviewHigh:
+ encoder_context->codec = CODEC_H264_MVC;
+ break;
+
+ default:
+ /* Never get here */
+ assert(0);
+ break;
+ }
for (i = 0; i < obj_config->num_attribs; i++) {
if (obj_config->attrib_list[i].type == VAConfigAttribRateControl) {
encoder_context->rate_control_mode = obj_config->attrib_list[i].value;
+
+ if (encoder_context->codec == CODEC_MPEG2 &&
+ encoder_context->rate_control_mode & VA_RC_CBR) {
+ WARN_ONCE("Don't support CBR for MPEG-2 encoding\n");
+ encoder_context->rate_control_mode &= ~VA_RC_CBR;
+ }
+
break;
}
}
@@ -376,6 +425,7 @@ gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
struct hw_context *
gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
+
return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
}
@@ -384,3 +434,10 @@ gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config
{
return intel_enc_hw_context_init(ctx, obj_config, gen75_vme_context_init, gen75_mfc_context_init);
}
+
+struct hw_context *
+gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
+{
+ return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init);
+}
+
diff --git a/src/i965_encoder.h b/src/i965_encoder.h
index d9d6511..20d49fc 100644
--- a/src/i965_encoder.h
+++ b/src/i965_encoder.h
@@ -39,10 +39,12 @@
struct intel_encoder_context
{
struct hw_context base;
- VAProfile profile;
+ int codec;
VASurfaceID input_yuv_surface;
int is_tmp_id;
unsigned int rate_control_mode;
+ unsigned int quality_level;
+ unsigned int quality_range;
void *vme_context;
void *mfc_context;
void (*vme_context_destroy)(void *vme_context);
@@ -62,6 +64,8 @@ struct intel_encoder_context
extern struct hw_context *
gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
+extern struct hw_context *
+gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
#endif /* _I965_ENCODER_H_ */
diff --git a/src/i965_encoder_utils.c b/src/i965_encoder_utils.c
index 7f6f768..abd25b4 100644
--- a/src/i965_encoder_utils.c
+++ b/src/i965_encoder_utils.c
@@ -233,13 +233,22 @@ slice_header(avc_bitstream *bs,
/* slice type */
if (IS_P_SLICE(slice_param->slice_type)) {
- avc_bitstream_put_ui(bs, 0, 1); /* num_ref_idx_active_override_flag: 0 */
+ avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */
+
+ if (slice_param->num_ref_idx_active_override_flag)
+ avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1);
/* ref_pic_list_reordering */
avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */
} else if (IS_B_SLICE(slice_param->slice_type)) {
avc_bitstream_put_ui(bs, slice_param->direct_spatial_mv_pred_flag, 1); /* direct_spatial_mv_pred: 1 */
- avc_bitstream_put_ui(bs, 0, 1); /* num_ref_idx_active_override_flag: 0 */
+
+ avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */
+
+ if (slice_param->num_ref_idx_active_override_flag) {
+ avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1);
+ avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l1_active_minus1);
+ }
/* ref_pic_list_reordering */
avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */
@@ -298,6 +307,7 @@ build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param,
{
avc_bitstream bs;
int is_idr = !!pic_param->pic_fields.bits.idr_pic_flag;
+ int is_ref = !!pic_param->pic_fields.bits.reference_pic_flag;
avc_bitstream_start(&bs);
nal_start_code_prefix(&bs);
@@ -305,10 +315,12 @@ build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param,
if (IS_I_SLICE(slice_param->slice_type)) {
nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR);
} else if (IS_P_SLICE(slice_param->slice_type)) {
- nal_header(&bs, NAL_REF_IDC_MEDIUM, is_idr ? NAL_IDR : NAL_NON_IDR);
+ assert(!is_idr);
+ nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR);
} else {
assert(IS_B_SLICE(slice_param->slice_type));
- nal_header(&bs, NAL_REF_IDC_NONE, is_idr ? NAL_IDR : NAL_NON_IDR);
+ assert(!is_idr);
+ nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR);
}
slice_header(&bs, sps_param, pic_param, slice_param);
diff --git a/src/i965_fourcc.h b/src/i965_fourcc.h
new file mode 100644
index 0000000..3a9f120
--- /dev/null
+++ b/src/i965_fourcc.h
@@ -0,0 +1,68 @@
+#ifndef _I965_FOURCC_H_
+#define _I965_FOURCC_H_
+
+#ifndef VA_FOURCC_YV16
+#define VA_FOURCC_YV16 VA_FOURCC('Y','V','1','6')
+#endif
+
+#ifndef VA_FOURCC_I420
+#define VA_FOURCC_I420 VA_FOURCC('I','4','2','0')
+#endif
+
+/*
+ * VA_FOURCC_IA44 is an exception because the va.h already
+ * defines the AI44 as VA_FOURCC('I', 'A', '4', '4').
+ */
+#ifndef VA_FOURCC_IA44
+#define VA_FOURCC_IA44 VA_FOURCC('A','I','4','4')
+#endif
+
+#ifndef VA_FOURCC_IA88
+#define VA_FOURCC_IA88 VA_FOURCC('I','A','8','8')
+#endif
+
+#ifndef VA_FOURCC_AI88
+#define VA_FOURCC_AI88 VA_FOURCC('A','I','8','8')
+#endif
+
+#ifndef VA_FOURCC_IMC1
+#define VA_FOURCC_IMC1 VA_FOURCC('I','M','C','1')
+#endif
+
+#ifndef VA_FOURCC_YVY2
+#define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2')
+#endif
+
+#define I965_MAX_PLANES 4
+#define I965_MAX_COMONENTS 4
+
+#define I965_COLOR_YUV 0
+#define I965_COLOR_RGB 1
+#define I965_COLOR_INDEX 2
+
+typedef struct {
+ uint8_t plane; /* the plane which the pixel belongs to */
+ uint8_t offset; /* bits offset within a pixel in the plane */
+} i965_component_info;
+
+typedef struct {
+ uint32_t fourcc; /* fourcc */
+ uint32_t format; /* 0: YUV, 1: RGB, 2: Indexed format */
+ uint32_t subsampling; /* Sub sampling */
+ uint8_t flag; /* 1: only supported by vaCreateSurfaces(), 2: only supported by vaCreateImage(), 3: both */
+ uint8_t hfactor; /* horizontal sampling factor */
+ uint8_t vfactor; /* vertical sampling factor */
+ uint8_t num_planes; /* number of planes */
+ uint8_t bpp[I965_MAX_PLANES]; /* bits per pixel within a plane */
+ uint8_t num_components; /* number of components */
+ /*
+ * Components in the array are ordered in Y, U, V, A (up to 4 components)
+ * for YUV formats, R, G, B, A (up to 4 components) for RGB formats and
+ * I, A (2 components) for indexed formats
+ */
+ i965_component_info components[I965_MAX_COMONENTS];
+} i965_fourcc_info;
+
+extern const i965_fourcc_info *get_fourcc_info(unsigned int);
+
+#endif /* _I965_FOURCC_H_ */
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 4c6469b..3386b09 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -296,6 +296,44 @@ gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling
}
static void
+gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss0.tiled_surface = 0;
+ ss->ss0.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
+gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss2.tiled_surface = 0;
+ ss->ss2.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
i965_gpe_set_surface2_state(VADriverContextP ctx,
struct object_surface *obj_surface,
struct i965_surface_state2 *ss)
@@ -304,7 +342,7 @@ i965_gpe_set_surface2_state(VADriverContextP ctx,
unsigned int tiling, swizzle;
assert(obj_surface->bo);
- assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+ assert(obj_surface->fourcc == VA_FOURCC_NV12);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
w = obj_surface->orig_width;
@@ -467,7 +505,7 @@ gen7_gpe_set_surface2_state(VADriverContextP ctx,
unsigned int tiling, swizzle;
assert(obj_surface->bo);
- assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+ assert(obj_surface->fourcc == VA_FOURCC_NV12);
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
w = obj_surface->orig_width;
@@ -610,7 +648,7 @@ gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
dri_bo *bo;
int cbcr_offset;
- assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+ assert(obj_surface->fourcc == VA_FOURCC_NV12);
bo = gpe_context->surface_state_binding_table.bo;
dri_bo_map(bo, True);
assert(bo->virtual);
@@ -677,3 +715,495 @@ gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
*((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
dri_bo_unmap(bo);
}
+
+static void
+gen8_gpe_set_surface2_state(VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ struct gen8_surface_state2 *ss)
+{
+ int w, h, w_pitch;
+ unsigned int tiling, swizzle;
+
+ assert(obj_surface->bo);
+ assert(obj_surface->fourcc == VA_FOURCC_NV12);
+
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+ w = obj_surface->orig_width;
+ h = obj_surface->orig_height;
+ w_pitch = obj_surface->width;
+
+ memset(ss, 0, sizeof(*ss));
+ /* ss0 */
+ ss->ss6.base_addr = obj_surface->bo->offset;
+ /* ss1 */
+ ss->ss1.cbcr_pixel_offset_v_direction = 2;
+ ss->ss1.width = w - 1;
+ ss->ss1.height = h - 1;
+ /* ss2 */
+ ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = w_pitch - 1;
+ ss->ss2.half_pitch_for_chroma = 0;
+ gen8_gpe_set_surface2_tiling(ss, tiling);
+ /* ss3: UV offset for interleave mode */
+ ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
+ ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
+}
+
+void
+gen8_gpe_surface2_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset)
+{
+ struct gen8_surface_state2 *ss;
+ dri_bo *bo;
+
+ bo = gpe_context->surface_state_binding_table.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+
+ ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
+ gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
+ obj_surface->bo);
+
+ *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+ dri_bo_unmap(bo);
+}
+
+static void
+gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ struct gen8_surface_state *ss)
+{
+ int w, h, w_pitch;
+ unsigned int tiling, swizzle;
+
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+ w = obj_surface->orig_width;
+ h = obj_surface->orig_height;
+ w_pitch = obj_surface->width;
+
+ memset(ss, 0, sizeof(*ss));
+ /* ss0 */
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+ /* ss1 */
+ ss->ss8.base_addr = obj_surface->bo->offset;
+ /* ss2 */
+ ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
+ ss->ss2.height = h - 1;
+ /* ss3 */
+ ss->ss3.pitch = w_pitch - 1;
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
+ struct object_surface *obj_surface,
+ struct gen8_surface_state *ss)
+{
+ int w, h, w_pitch;
+ unsigned int tiling, swizzle;
+ int cbcr_offset;
+
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+ w = obj_surface->orig_width;
+ h = obj_surface->orig_height;
+ w_pitch = obj_surface->width;
+
+ cbcr_offset = obj_surface->height * obj_surface->width;
+ memset(ss, 0, sizeof(*ss));
+ /* ss0 */
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+ /* ss1 */
+ ss->ss8.base_addr = obj_surface->bo->offset + cbcr_offset;
+ /* ss2 */
+ ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
+ ss->ss2.height = (obj_surface->height / 2) -1;
+ /* ss3 */
+ ss->ss3.pitch = w_pitch - 1;
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+void
+gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset)
+{
+ struct gen8_surface_state *ss;
+ dri_bo *bo;
+
+ bo = gpe_context->surface_state_binding_table.bo;
+ dri_bo_map(bo, True);
+ assert(bo->virtual);
+
+ ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
+ gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ obj_surface->bo);
+
+ *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+ dri_bo_unmap(bo);
+}
+
+void
+gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset)
+{
+ struct gen8_surface_state *ss;
+ dri_bo *bo;
+ int cbcr_offset;
+
+ assert(obj_surface->fourcc == VA_FOURCC_NV12);
+ bo = gpe_context->surface_state_binding_table.bo;
+ dri_bo_map(bo, True);
+ assert(bo->virtual);
+
+ cbcr_offset = obj_surface->height * obj_surface->width;
+ ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
+ gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ cbcr_offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ obj_surface->bo);
+
+ *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+ dri_bo_unmap(bo);
+}
+
+
+static void
+gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
+ struct i965_buffer_surface *buffer_surface,
+ struct gen8_surface_state *ss)
+{
+ int num_entries;
+
+ assert(buffer_surface->bo);
+ num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
+
+ memset(ss, 0, sizeof(*ss));
+ /* ss0 */
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+ /* ss1 */
+ ss->ss8.base_addr = buffer_surface->bo->offset;
+ /* ss2 */
+ ss->ss2.width = ((num_entries - 1) & 0x7f);
+ ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
+ /* ss3 */
+ ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
+ ss->ss3.pitch = buffer_surface->pitch - 1;
+}
+
+void
+gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct i965_buffer_surface *buffer_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset)
+{
+ struct gen8_surface_state *ss;
+ dri_bo *bo;
+
+ bo = gpe_context->surface_state_binding_table.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+
+ ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
+ gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ buffer_surface->bo);
+
+ *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+ dri_bo_unmap(bo);
+}
+
+static void
+gen8_gpe_state_base_address(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct intel_batchbuffer *batch)
+{
+ BEGIN_BATCH(batch, 16);
+
+ OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
+
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+
+ /*DW4 Surface state base address */
+ OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_BATCH(batch, 0);
+
+ /*DW6. Dynamic state base address */
+ if (gpe_context->dynamic_state.bo)
+ OUT_RELOC(batch, gpe_context->dynamic_state.bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
+ 0, BASE_ADDRESS_MODIFY);
+ else
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+
+ OUT_BATCH(batch, 0);
+
+ /*DW8. Indirect Object base address */
+ if (gpe_context->indirect_state.bo)
+ OUT_RELOC(batch, gpe_context->indirect_state.bo,
+ I915_GEM_DOMAIN_SAMPLER,
+ 0, BASE_ADDRESS_MODIFY);
+ else
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+
+ OUT_BATCH(batch, 0);
+
+ /*DW10. Instruct base address */
+ if (gpe_context->instruction_state.bo)
+ OUT_RELOC(batch, gpe_context->instruction_state.bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0, BASE_ADDRESS_MODIFY);
+ else
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+
+ OUT_BATCH(batch, 0);
+
+ /* DW12. Size limitation */
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
+
+ /*
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound
+ */
+
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_gpe_vfe_state(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct intel_batchbuffer *batch)
+{
+
+ BEGIN_BATCH(batch, 9);
+
+ OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
+ /* Scratch Space Base Pointer and Space */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+
+ OUT_BATCH(batch,
+ gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
+ gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
+ gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
+ OUT_BATCH(batch, 0); /* Debug: Object ID */
+ OUT_BATCH(batch,
+ gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
+ gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
+
+ /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
+ OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
+ OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
+ OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
+
+ ADVANCE_BATCH(batch);
+
+}
+
+
+static void
+gen8_gpe_curbe_load(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct intel_batchbuffer *batch)
+{
+ BEGIN_BATCH(batch, 4);
+
+ OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, gpe_context->curbe_size);
+ OUT_BATCH(batch, gpe_context->curbe_offset);
+
+ ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_gpe_idrt(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct intel_batchbuffer *batch)
+{
+ BEGIN_BATCH(batch, 6);
+
+ OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
+ OUT_BATCH(batch, 0);
+
+ OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, gpe_context->idrt_size);
+ OUT_BATCH(batch, gpe_context->idrt_offset);
+
+ ADVANCE_BATCH(batch);
+}
+
+
+void
+gen8_gpe_pipeline_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct intel_batchbuffer *batch)
+{
+ intel_batchbuffer_emit_mi_flush(batch);
+
+ i965_gpe_select(ctx, gpe_context, batch);
+ gen8_gpe_state_base_address(ctx, gpe_context, batch);
+ gen8_gpe_vfe_state(ctx, gpe_context, batch);
+ gen8_gpe_curbe_load(ctx, gpe_context, batch);
+ gen8_gpe_idrt(ctx, gpe_context, batch);
+}
+
+void
+gen8_gpe_context_init(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ dri_bo *bo;
+ int bo_size;
+ unsigned int start_offset, end_offset;
+
+ dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ gpe_context->surface_state_binding_table.length,
+ 4096);
+ assert(bo);
+ gpe_context->surface_state_binding_table.bo = bo;
+
+ bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
+ dri_bo_unreference(gpe_context->dynamic_state.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ bo_size,
+ 4096);
+ assert(bo);
+ gpe_context->dynamic_state.bo = bo;
+ gpe_context->dynamic_state.bo_size = bo_size;
+
+ end_offset = 0;
+ gpe_context->dynamic_state.end_offset = 0;
+
+ /* Constant buffer offset */
+ start_offset = ALIGN(end_offset, 64);
+ gpe_context->curbe_offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe_size;
+
+ /* Interface descriptor offset */
+ start_offset = ALIGN(end_offset, 64);
+ gpe_context->idrt_offset = start_offset;
+ end_offset = start_offset + gpe_context->idrt_size;
+
+ /* Sampler state offset */
+ start_offset = ALIGN(end_offset, 64);
+ gpe_context->sampler_offset = start_offset;
+ end_offset = start_offset + gpe_context->sampler_size;
+
+ /* update the end offset of dynamic_state */
+ gpe_context->dynamic_state.end_offset = end_offset;
+}
+
+
+void
+gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
+{
+ int i;
+
+ dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
+ gpe_context->surface_state_binding_table.bo = NULL;
+
+ dri_bo_unreference(gpe_context->instruction_state.bo);
+ gpe_context->instruction_state.bo = NULL;
+
+ dri_bo_unreference(gpe_context->dynamic_state.bo);
+ gpe_context->dynamic_state.bo = NULL;
+
+ dri_bo_unreference(gpe_context->indirect_state.bo);
+ gpe_context->indirect_state.bo = NULL;
+
+}
+
+
+void
+gen8_gpe_load_kernels(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct i965_kernel *kernel_list,
+ unsigned int num_kernels)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i, kernel_size;
+ unsigned int kernel_offset, end_offset;
+ unsigned char *kernel_ptr;
+ struct i965_kernel *kernel;
+
+ assert(num_kernels <= MAX_GPE_KERNELS);
+ memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
+ gpe_context->num_kernels = num_kernels;
+
+ kernel_size = num_kernels * 64;
+ for (i = 0; i < num_kernels; i++) {
+ kernel = &gpe_context->kernels[i];
+
+ kernel_size += kernel->size;
+ }
+
+ gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "kernel shader",
+ kernel_size,
+ 0x1000);
+ if (gpe_context->instruction_state.bo == NULL) {
+ WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
+ return;
+ }
+
+ assert(gpe_context->instruction_state.bo);
+
+ gpe_context->instruction_state.bo_size = kernel_size;
+ gpe_context->instruction_state.end_offset = 0;
+ end_offset = 0;
+
+ dri_bo_map(gpe_context->instruction_state.bo, 1);
+ kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
+ for (i = 0; i < num_kernels; i++) {
+ kernel_offset = ALIGN(end_offset, 64);
+ kernel = &gpe_context->kernels[i];
+ kernel->kernel_offset = kernel_offset;
+
+ if (kernel->size) {
+ memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
+
+ end_offset = kernel_offset + kernel->size;
+ }
+ }
+
+ gpe_context->instruction_state.end_offset = end_offset;
+
+ dri_bo_unmap(gpe_context->instruction_state.bo);
+
+ return;
+}
+
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 72d7de8..2331152 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -114,6 +114,29 @@ struct i965_gpe_context
unsigned int num_kernels;
struct i965_kernel kernels[MAX_GPE_KERNELS];
+
+ struct {
+ dri_bo *bo;
+ int bo_size;
+ unsigned int end_offset;
+ } instruction_state;
+
+ struct {
+ dri_bo *bo;
+ } indirect_state;
+
+ struct {
+ dri_bo *bo;
+ int bo_size;
+ unsigned int end_offset;
+ } dynamic_state;
+
+ unsigned int sampler_offset;
+ int sampler_size;
+ unsigned int idrt_offset;
+ int idrt_size;
+ unsigned int curbe_offset;
+ int curbe_size;
};
void i965_gpe_context_destroy(struct i965_gpe_context *gpe_context);
@@ -161,4 +184,39 @@ void gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
+
+extern void gen8_gpe_surface2_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset);
+extern void gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset);
+extern void gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct i965_buffer_surface *buffer_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset);
+extern void gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct object_surface *obj_surface,
+ unsigned long binding_table_offset,
+ unsigned long surface_state_offset);
+
+void gen8_gpe_pipeline_setup(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct intel_batchbuffer *batch);
+
+
+void gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context);
+void gen8_gpe_context_init(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context);
+
+void gen8_gpe_load_kernels(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ struct i965_kernel *kernel_list,
+ unsigned int num_kernels);
#endif /* _I965_GPE_UTILS_H_ */
diff --git a/src/i965_media.c b/src/i965_media.c
index d734a8e..644104e 100644
--- a/src/i965_media.c
+++ b/src/i965_media.c
@@ -60,7 +60,7 @@ i965_media_urb_layout(VADriverContextP ctx, struct i965_media_context *media_con
unsigned int vfe_fence, cs_fence;
vfe_fence = media_context->urb.cs_start;
- cs_fence = URB_SIZE((&i965->intel));
+ cs_fence = i965->intel.device_info->urb_size;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
@@ -77,7 +77,7 @@ i965_media_state_base_address(VADriverContextP ctx, struct i965_media_context *m
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = media_context->base.batch;
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
@@ -257,7 +257,7 @@ i965_media_decode_init(VADriverContextP ctx,
i965_media_mpeg2_decode_init(ctx, decode_state, media_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
i965_media_h264_decode_init(ctx, decode_state, media_context);
@@ -348,7 +348,7 @@ g4x_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
i965_media_mpeg2_dec_context_init(ctx, media_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
i965_media_h264_dec_context_init(ctx, media_context);
@@ -381,7 +381,7 @@ ironlake_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_con
i965_media_mpeg2_dec_context_init(ctx, media_context);
break;
- case VAProfileH264Baseline:
+ case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
i965_media_h264_dec_context_init(ctx, media_context);
diff --git a/src/i965_media_h264.c b/src/i965_media_h264.c
index 93e13f6..cf95299 100644
--- a/src/i965_media_h264.c
+++ b/src/i965_media_h264.c
@@ -11,6 +11,7 @@
#include "i965_drv_video.h"
#include "i965_media.h"
#include "i965_media_h264.h"
+#include "i965_decoder_utils.h"
enum {
INTRA_16X16 = 0,
@@ -349,7 +350,7 @@ i965_media_h264_surfaces_setup(VADriverContextP ctx,
struct object_surface *obj_surface;
VAPictureParameterBufferH264 *pic_param;
VAPictureH264 *va_pic;
- int i, j, w, h;
+ int i, w, h;
int field_picture;
assert(media_context->private_context);
@@ -381,24 +382,15 @@ i965_media_h264_surfaces_setup(VADriverContextP ctx,
/* Reference Pictures */
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
- if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID &&
- i965_h264_context->fsid_list[i].obj_surface != NULL) {
- int found = 0;
- for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
- va_pic = &pic_param->ReferenceFrames[j];
-
- if (va_pic->flags & VA_PICTURE_H264_INVALID)
- continue;
-
- if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
- found = 1;
- break;
- }
- }
-
- assert(found == 1);
-
- obj_surface = i965_h264_context->fsid_list[i].obj_surface;
+ struct object_surface * const obj_surface =
+ i965_h264_context->fsid_list[i].obj_surface;
+
+ if (obj_surface) {
+ const VAPictureH264 * const va_pic = avc_find_picture(
+ obj_surface->base.id, pic_param->ReferenceFrames,
+ ARRAY_ELEMS(pic_param->ReferenceFrames));
+
+ assert(va_pic != NULL);
w = obj_surface->width;
h = obj_surface->height;
field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
@@ -919,7 +911,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context
sizeof(h264_avc_kernels_gen5[0])));
assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
sizeof(avc_mc_kernel_offset_gen5[0])));
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
intra_kernel_header = &intra_kernel_header_gen5;
@@ -953,7 +945,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context
media_context->free_private_context = i965_media_h264_free_private_context;
/* URB */
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
media_context->urb.num_vfe_entries = 63;
} else {
media_context->urb.num_vfe_entries = 23;
@@ -968,7 +960,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context
media_context->urb.cs_start = media_context->urb.vfe_start +
media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
assert(media_context->urb.cs_start +
- media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+ media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
/* hook functions */
media_context->media_states_setup = i965_media_h264_states_setup;
diff --git a/src/i965_media_h264.h b/src/i965_media_h264.h
index 490213c..e507e1d 100644
--- a/src/i965_media_h264.h
+++ b/src/i965_media_h264.h
@@ -61,6 +61,7 @@ struct i965_h264_context
struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context;
struct i965_avc_ildb_context avc_ildb_context;
+ GenFrameStoreContext fs_ctx;
GenFrameStore fsid_list[MAX_GEN_REFERENCE_FRAMES];
struct i965_kernel avc_kernels[NUM_H264_AVC_KERNELS];
diff --git a/src/i965_media_mpeg2.c b/src/i965_media_mpeg2.c
index 1c105b3..245c8e7 100644
--- a/src/i965_media_mpeg2.c
+++ b/src/i965_media_mpeg2.c
@@ -515,7 +515,7 @@ i965_media_mpeg2_surface_setup(VADriverContextP ctx,
int w = obj_surface->width;
int h = obj_surface->height;
- i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('I','4','2','0'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_I420, SUBSAMPLE_YUV420);
if (picture_structure == MPEG_FRAME) {
i965_media_mpeg2_surface_state(ctx, base_index + 0, obj_surface,
@@ -988,7 +988,7 @@ i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_contex
sizeof(mpeg2_vld_kernels_gen5[0])));
assert(NUM_MPEG2_VLD_KERNELS <= MAX_INTERFACE_DESC);
- if (IS_IRONLAKE(i965->intel.device_id))
+ if (IS_IRONLAKE(i965->intel.device_info))
memcpy(i965_mpeg2_context->vld_kernels, mpeg2_vld_kernels_gen5, sizeof(i965_mpeg2_context->vld_kernels));
else
memcpy(i965_mpeg2_context->vld_kernels, mpeg2_vld_kernels_gen4, sizeof(i965_mpeg2_context->vld_kernels));
@@ -1013,7 +1013,7 @@ i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_contex
media_context->urb.cs_start = media_context->urb.vfe_start +
media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
assert(media_context->urb.cs_start +
- media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+ media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
/* hook functions */
media_context->media_states_setup = i965_media_mpeg2_states_setup;
diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c
index de7be92..6f8ea31 100644
--- a/src/i965_output_dri.c
+++ b/src/i965_output_dri.c
@@ -127,6 +127,7 @@ i965_put_surface_dri(
bool new_region = false;
uint32_t name;
int i, ret;
+ unsigned int color_flag = 0;
/* Currently don't support DRI1 */
if (!VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2))
@@ -136,8 +137,7 @@ i965_put_surface_dri(
* will get here
*/
obj_surface = SURFACE(surface);
- if (!obj_surface || !obj_surface->bo)
- return VA_STATUS_SUCCESS;
+ ASSERT_RET(obj_surface && obj_surface->bo, VA_STATUS_SUCCESS);
_i965LockMutex(&i965->render_mutex);
@@ -179,6 +179,12 @@ i965_put_surface_dri(
assert(ret == 0);
}
+ color_flag = flags & VA_SRC_COLOR_MASK;
+ if (color_flag == 0)
+ color_flag = VA_SRC_BT601;
+
+ pp_flag = color_flag;
+
if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC)
pp_flag |= I965_PP_FLAG_AVS;
@@ -197,17 +203,8 @@ i965_put_surface_dri(
}
}
- dri_vtable->swap_buffer(ctx, dri_drawable);
- obj_surface->flags |= SURFACE_DISPLAYED;
-
- if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
- dri_bo_unreference(obj_surface->bo);
- obj_surface->bo = NULL;
- obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
-
- if (obj_surface->free_private_data)
- obj_surface->free_private_data(&obj_surface->private_data);
- }
+ if (!(g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_BENCH))
+ dri_vtable->swap_buffer(ctx, dri_drawable);
_i965UnlockMutex(&i965->render_mutex);
diff --git a/src/i965_output_wayland.c b/src/i965_output_wayland.c
index be7f32c..5a75397 100644
--- a/src/i965_output_wayland.c
+++ b/src/i965_output_wayland.c
@@ -237,7 +237,7 @@ va_GetSurfaceBufferWl(
return VA_STATUS_ERROR_INVALID_SURFACE;
switch (obj_surface->fourcc) {
- case VA_FOURCC('N','V','1','2'):
+ case VA_FOURCC_NV12:
drm_format = WL_DRM_FORMAT_NV12;
offsets[0] = 0;
pitches[0] = obj_surface->width;
@@ -246,10 +246,14 @@ va_GetSurfaceBufferWl(
offsets[2] = 0;
pitches[2] = 0;
break;
- case VA_FOURCC('Y','V','1','2'):
- case VA_FOURCC('I','4','2','0'):
- case VA_FOURCC('I','M','C','1'):
- case VA_FOURCC('I','M','C','3'):
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_I420:
+ case VA_FOURCC_IMC1:
+ case VA_FOURCC_IMC3:
+ case VA_FOURCC_422H:
+ case VA_FOURCC_422V:
+ case VA_FOURCC_411P:
+ case VA_FOURCC_444P:
switch (obj_surface->subsampling) {
case SUBSAMPLE_YUV411:
drm_format = WL_DRM_FORMAT_YUV411;
diff --git a/src/i965_pciids.h b/src/i965_pciids.h
new file mode 100644
index 0000000..fc046d1
--- /dev/null
+++ b/src/i965_pciids.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Copied and modified from (mesa) include/pci_ids/i965_pci_ids.h
+ */
+
+CHIPSET(0x2A42, g4x, g4x, "Intel(R) GM45 Express Chipset")
+CHIPSET(0x2E02, g4x, g4x, "Intel(R) Integrated Graphics Device")
+CHIPSET(0x2E12, g4x, g4x, "Intel(R) Q45/Q43")
+CHIPSET(0x2E22, g4x, g4x, "Intel(R) G45/G43")
+CHIPSET(0x2E32, g4x, g4x, "Intel(R) G41")
+CHIPSET(0x2E42, g4x, g4x, "Intel(R) B43")
+CHIPSET(0x2E92, g4x, g4x, "Intel(R) B43")
+CHIPSET(0x0042, ilk, ilk, "Intel(R) Ironlake Desktop")
+CHIPSET(0x0046, ilk, ilk, "Intel(R) Ironlake Mobile")
+CHIPSET(0x0102, snb, snb_gt1, "Intel(R) Sandybridge Desktop")
+CHIPSET(0x0112, snb, snb_gt2, "Intel(R) Sandybridge Desktop")
+CHIPSET(0x0122, snb, snb_gt2, "Intel(R) Sandybridge Desktop")
+CHIPSET(0x0106, snb, snb_gt1, "Intel(R) Sandybridge Mobile")
+CHIPSET(0x0116, snb, snb_gt2, "Intel(R) Sandybridge Mobile")
+CHIPSET(0x0126, snb, snb_gt2, "Intel(R) Sandybridge Mobile")
+CHIPSET(0x010A, snb, snb_gt1, "Intel(R) Sandybridge Server")
+CHIPSET(0x0152, ivb, ivb_gt1, "Intel(R) Ivybridge Desktop")
+CHIPSET(0x0162, ivb, ivb_gt2, "Intel(R) Ivybridge Desktop")
+CHIPSET(0x0156, ivb, ivb_gt1, "Intel(R) Ivybridge Mobile")
+CHIPSET(0x0166, ivb, ivb_gt2, "Intel(R) Ivybridge Mobile")
+CHIPSET(0x015A, ivb, ivb_gt1, "Intel(R) Ivybridge Server")
+CHIPSET(0x016A, ivb, ivb_gt2, "Intel(R) Ivybridge Server")
+CHIPSET(0x0F31, ivb, byt, "Intel(R) Bay Trail")
+CHIPSET(0x0F32, ivb, byt, "Intel(R) Bay Trail")
+CHIPSET(0x0F33, ivb, byt, "Intel(R) Bay Trail")
+CHIPSET(0x0157, ivb, byt, "Intel(R) Bay Trail")
+CHIPSET(0x0155, ivb, byt, "Intel(R) Bay Trail")
+CHIPSET(0x0402, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
+CHIPSET(0x0412, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
+CHIPSET(0x0422, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
+CHIPSET(0x0406, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
+CHIPSET(0x0416, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
+CHIPSET(0x0426, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
+CHIPSET(0x040A, hsw, hsw_gt1, "Intel(R) Haswell Server")
+CHIPSET(0x041A, hsw, hsw_gt2, "Intel(R) Haswell Server")
+CHIPSET(0x042A, hsw, hsw_gt3, "Intel(R) Haswell Server")
+CHIPSET(0x040B, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x041B, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x042B, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x040E, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x041E, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x042E, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x0C02, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
+CHIPSET(0x0C12, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
+CHIPSET(0x0C22, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
+CHIPSET(0x0C06, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
+CHIPSET(0x0C16, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
+CHIPSET(0x0C26, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
+CHIPSET(0x0C0A, hsw, hsw_gt1, "Intel(R) Haswell Server")
+CHIPSET(0x0C1A, hsw, hsw_gt2, "Intel(R) Haswell Server")
+CHIPSET(0x0C2A, hsw, hsw_gt3, "Intel(R) Haswell Server")
+CHIPSET(0x0C0B, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x0C1B, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x0C2B, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x0C0E, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x0C1E, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x0C2E, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x0A02, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
+CHIPSET(0x0A12, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
+CHIPSET(0x0A22, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
+CHIPSET(0x0A06, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
+CHIPSET(0x0A16, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
+CHIPSET(0x0A26, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
+CHIPSET(0x0A0A, hsw, hsw_gt1, "Intel(R) Haswell Server")
+CHIPSET(0x0A1A, hsw, hsw_gt2, "Intel(R) Haswell Server")
+CHIPSET(0x0A2A, hsw, hsw_gt3, "Intel(R) Haswell Server")
+CHIPSET(0x0A0B, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x0A1B, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x0A2B, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x0A0E, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x0A1E, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x0A2E, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x0D02, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
+CHIPSET(0x0D12, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
+CHIPSET(0x0D22, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
+CHIPSET(0x0D06, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
+CHIPSET(0x0D16, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
+CHIPSET(0x0D26, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
+CHIPSET(0x0D0A, hsw, hsw_gt1, "Intel(R) Haswell Server")
+CHIPSET(0x0D1A, hsw, hsw_gt2, "Intel(R) Haswell Server")
+CHIPSET(0x0D2A, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x0D0B, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x0D1B, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x0D2B, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x0D0E, hsw, hsw_gt1, "Intel(R) Haswell")
+CHIPSET(0x0D1E, hsw, hsw_gt2, "Intel(R) Haswell")
+CHIPSET(0x0D2E, hsw, hsw_gt3, "Intel(R) Haswell")
+CHIPSET(0x1602, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x1606, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x160A, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x160B, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x160D, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x160E, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x1612, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x1616, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x161A, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x161B, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x161D, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x161E, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x1622, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x1626, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x162A, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x162B, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x162D, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x162E, bdw, bdw, "Intel(R) Broadwell")
+CHIPSET(0x22B0, chv, chv, "Intel(R) CherryView")
+CHIPSET(0x22B1, chv, chv, "Intel(R) CherryView")
+CHIPSET(0x22B2, chv, chv, "Intel(R) CherryView")
+CHIPSET(0x22B3, chv, chv, "Intel(R) CherryView")
diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
index e91dc03..6d435a8 100755
--- a/src/i965_post_processing.c
+++ b/src/i965_post_processing.c
@@ -40,19 +40,16 @@
#include "i965_render.h"
#include "intel_media.h"
-#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \
- IS_GEN6((ctx)->intel.device_id) || \
- IS_GEN7((ctx)->intel.device_id))
+extern VAStatus
+vpp_surface_convert(VADriverContextP ctx,
+ struct object_surface *src_obj_surf,
+ struct object_surface *dst_obj_surf);
-#define SURFACE_STATE_PADDED_SIZE_0_I965 ALIGN(sizeof(struct i965_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_I965 ALIGN(sizeof(struct i965_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_I965 MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
+#define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
-#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
+ MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
-#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
@@ -60,6 +57,8 @@
#define GPU_ASM_BLOCK_HEIGHT 8
#define GPU_ASM_X_OFFSET_ALIGNMENT 4
+#define VA_STATUS_SUCCESS_1 0xFFFFFFFE
+
static const uint32_t pp_null_gen5[][4] = {
#include "shaders/post_processing/gen5_6/null.g4b.gen5"
};
@@ -112,6 +111,10 @@ static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
#include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
};
+static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
+};
+
static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
#include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
};
@@ -322,6 +325,18 @@ static struct pp_module pp_modules_gen5[] = {
{
{
+ "PA_PA module",
+ PP_PA_LOAD_SAVE_PA,
+ pp_pa_load_save_pa_gen5,
+ sizeof(pp_pa_load_save_pa_gen5),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
+ {
+ {
"RGBX_NV12 module",
PP_RGBX_LOAD_SAVE_NV12,
pp_rgbx_load_save_nv12_gen5,
@@ -397,6 +412,10 @@ static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
#include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
};
+static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
+};
+
static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
#include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
};
@@ -560,7 +579,19 @@ static struct pp_module pp_modules_gen6[] = {
pp_plx_load_save_plx_initialize,
},
-
+
+ {
+ {
+ "PA_PA module",
+ PP_PA_LOAD_SAVE_PA,
+ pp_pa_load_save_pa_gen6,
+ sizeof(pp_pa_load_save_pa_gen6),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
{
{
"RGBX_NV12 module",
@@ -632,6 +663,9 @@ static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
#include "shaders/post_processing/gen7/pa_to_pl3.g7b"
};
+static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pa.g7b"
+};
static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
#include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
};
@@ -658,13 +692,6 @@ static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_pos
const VARectangle *dst_rect,
void *filter_param);
-static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
- const struct i965_surface *src_surface,
- const VARectangle *src_rect,
- struct i965_surface *dst_surface,
- const VARectangle *dst_rect,
- void *filter_param);
-
static struct pp_module pp_modules_gen7[] = {
{
{
@@ -820,7 +847,19 @@ static struct pp_module pp_modules_gen7[] = {
gen7_pp_plx_avs_initialize,
},
-
+
+ {
+ {
+ "PA_PA module",
+ PP_PA_LOAD_SAVE_PA,
+ pp_pa_load_save_pa_gen7,
+ sizeof(pp_pa_load_save_pa_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
{
{
"RGBX_NV12 module",
@@ -830,7 +869,7 @@ static struct pp_module pp_modules_gen7[] = {
NULL,
},
- gen7_pp_rgbx_avs_initialize,
+ gen7_pp_plx_avs_initialize,
},
{
@@ -893,6 +932,9 @@ static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
#include "shaders/post_processing/gen7/pa_to_pl3.g75b"
};
+static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pa.g75b"
+};
static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
#include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
};
@@ -994,7 +1036,7 @@ static struct pp_module pp_modules_gen75[] = {
NULL,
},
- gen7_pp_nv12_dndi_initialize,
+ gen7_pp_nv12_dn_initialize,
},
{
@@ -1056,7 +1098,19 @@ static struct pp_module pp_modules_gen75[] = {
gen7_pp_plx_avs_initialize,
},
-
+
+ {
+ {
+ "PA_PA module",
+ PP_PA_LOAD_SAVE_PA,
+ pp_pa_load_save_pa_gen75,
+ sizeof(pp_pa_load_save_pa_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
{
{
"RGBX_NV12 module",
@@ -1066,7 +1120,7 @@ static struct pp_module pp_modules_gen75[] = {
NULL,
},
- gen7_pp_rgbx_avs_initialize,
+ gen7_pp_plx_avs_initialize,
},
{
@@ -1100,6 +1154,22 @@ pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
}
static void
+pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
+{
+ if (surface->type == I965_SURFACE_TYPE_IMAGE) {
+ struct object_image *obj_image = (struct object_image *)surface->base;
+
+ *width = obj_image->image.width;
+ *height = obj_image->image.height;
+ } else {
+ struct object_surface *obj_surface = (struct object_surface *)surface->base;
+
+ *width = obj_surface->orig_width;
+ *height = obj_surface->orig_height;
+ }
+}
+
+static void
pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
{
switch (tiling) {
@@ -1358,8 +1428,8 @@ ironlake_pp_object_walker(VADriverContextP ctx,
int x, x_steps, y, y_steps;
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
- x_steps = pp_context->pp_x_steps(&pp_context->private_context);
- y_steps = pp_context->pp_y_steps(&pp_context->private_context);
+ x_steps = pp_context->pp_x_steps(pp_context->private_context);
+ y_steps = pp_context->pp_y_steps(pp_context->private_context);
for (y = 0; y < y_steps; y++) {
for (x = 0; x < x_steps; x++) {
@@ -1408,20 +1478,20 @@ static void i965_update_src_surface_static_parameter(
int fourcc = pp_get_surface_fourcc(ctx, surface);
switch (fourcc) {
- case VA_FOURCC('Y', 'U', 'Y', '2'):
+ case VA_FOURCC_YUY2:
pp_static_parameter->grf1.source_packed_u_offset = 1;
pp_static_parameter->grf1.source_packed_v_offset = 3;
break;
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ case VA_FOURCC_UYVY:
pp_static_parameter->grf1.source_packed_y_offset = 1;
pp_static_parameter->grf1.source_packed_v_offset = 2;
break;
- case VA_FOURCC('B', 'G', 'R', 'X'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
+ case VA_FOURCC_BGRX:
+ case VA_FOURCC_BGRA:
pp_static_parameter->grf1.source_rgb_layout = 0;
break;
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('R', 'G', 'B', 'A'):
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_RGBA:
pp_static_parameter->grf1.source_rgb_layout = 1;
break;
default:
@@ -1439,20 +1509,20 @@ static void i965_update_dst_surface_static_parameter(
int fourcc = pp_get_surface_fourcc(ctx, surface);
switch (fourcc) {
- case VA_FOURCC('Y', 'U', 'Y', '2'):
+ case VA_FOURCC_YUY2:
pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
break;
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ case VA_FOURCC_UYVY:
pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
break;
- case VA_FOURCC('B', 'G', 'R', 'X'):
- case VA_FOURCC('B', 'G', 'R', 'A'):
+ case VA_FOURCC_BGRX:
+ case VA_FOURCC_BGRA:
pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
break;
- case VA_FOURCC('R', 'G', 'B', 'X'):
- case VA_FOURCC('R', 'G', 'B', 'A'):
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_RGBA:
pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
break;
default:
@@ -1563,7 +1633,7 @@ gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont
ss->ss2.height = height - 1;
ss->ss3.pitch = pitch - 1;
gen7_pp_set_surface_tiling(ss, tiling);
- if (IS_HASWELL(i965->intel.device_id))
+ if (IS_HASWELL(i965->intel.device_info))
gen7_render_set_surface_scs(ss);
dri_bo_emit_reloc(ss_bo,
I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
@@ -1625,15 +1695,19 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
dri_bo *bo;
int fourcc = pp_get_surface_fourcc(ctx, surface);
const int Y = 0;
- const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
- const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
+ const int U = ((fourcc == VA_FOURCC_YV12) ||
+ (fourcc == VA_FOURCC_YV16))
+ ? 2 : 1;
+ const int V = ((fourcc == VA_FOURCC_YV12) ||
+ (fourcc == VA_FOURCC_YV16))
+ ? 1 : 2;
const int UV = 1;
- int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
- int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
- int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') ||
- fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
- fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
- fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
+ int interleaved_uv = fourcc == VA_FOURCC_NV12;
+ int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
+ int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
+ fourcc == VA_FOURCC_RGBX ||
+ fourcc == VA_FOURCC_BGRA ||
+ fourcc == VA_FOURCC_BGRX);
int scale_factor_of_1st_plane_width_in_byte = 1;
if (surface->type == I965_SURFACE_TYPE_SURFACE) {
@@ -1646,11 +1720,9 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
if (full_packed_format) {
scale_factor_of_1st_plane_width_in_byte = 4;
- pitch[0] = obj_surface->width * 4;
}
else if (packed_yuv ) {
scale_factor_of_1st_plane_width_in_byte = 2;
- pitch[0] = obj_surface->width * 2;
}
else if (interleaved_uv) {
width[1] = obj_surface->orig_width;
@@ -1695,6 +1767,12 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
height[2] = obj_image->image.height / 2;
pitch[2] = obj_image->image.pitches[2];
offset[2] = obj_image->image.offsets[2];
+ if (fourcc == VA_FOURCC_YV16) {
+ width[1] = obj_image->image.width / 2;
+ height[1] = obj_image->image.height;
+ width[2] = obj_image->image.width / 2;
+ height[2] = obj_image->image.height;
+ }
}
}
@@ -1731,84 +1809,73 @@ static void
gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
const struct i965_surface *surface,
int base_index, int is_target,
+ const VARectangle *rect,
int *width, int *height, int *pitch, int *offset)
{
struct object_surface *obj_surface;
struct object_image *obj_image;
dri_bo *bo;
int fourcc = pp_get_surface_fourcc(ctx, surface);
- const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
- fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
- const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
- fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
- int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
- int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
- int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') ||
- fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
- fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
- fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
+ const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
+
+ if (fourcc_info == NULL)
+ return;
if (surface->type == I965_SURFACE_TYPE_SURFACE) {
obj_surface = (struct object_surface *)surface->base;
bo = obj_surface->bo;
- width[0] = obj_surface->orig_width;
- height[0] = obj_surface->orig_height;
+ width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
+ height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
pitch[0] = obj_surface->width;
offset[0] = 0;
- if (packed_yuv) {
- if (is_target)
- width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
- else
- width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */
-
- pitch[0] = obj_surface->width * 2;
- } else if (rgbx_format) {
- if (is_target)
- width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
- pitch[0] = obj_surface->width * 4;
- }
+ if (fourcc_info->num_planes == 1 && is_target)
+ width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
- width[1] = obj_surface->cb_cr_width;
- height[1] = obj_surface->cb_cr_height;
+ width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
+ height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
pitch[1] = obj_surface->cb_cr_pitch;
offset[1] = obj_surface->y_cb_offset * obj_surface->width;
- width[2] = obj_surface->cb_cr_width;
- height[2] = obj_surface->cb_cr_height;
+ width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
+ height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
pitch[2] = obj_surface->cb_cr_pitch;
offset[2] = obj_surface->y_cr_offset * obj_surface->width;
} else {
+ int U = 0, V = 0;
+
+ /* FIXME: add support for ARGB/ABGR image */
obj_image = (struct object_image *)surface->base;
bo = obj_image->bo;
- width[0] = obj_image->image.width;
- height[0] = obj_image->image.height;
+ width[0] = MIN(rect->x + rect->width, obj_image->image.width);
+ height[0] = MIN(rect->y + rect->height, obj_image->image.height);
pitch[0] = obj_image->image.pitches[0];
offset[0] = obj_image->image.offsets[0];
- if (rgbx_format) {
- if (is_target)
- width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
- } else if (packed_yuv) {
+ if (fourcc_info->num_planes == 1) {
if (is_target)
- width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */
- else
- width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */
- } else if (interleaved_uv) {
- width[1] = obj_image->image.width / 2;
- height[1] = obj_image->image.height / 2;
- pitch[1] = obj_image->image.pitches[1];
- offset[1] = obj_image->image.offsets[1];
+ width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
+ } else if (fourcc_info->num_planes == 2) {
+ U = 1, V = 1;
} else {
- width[1] = obj_image->image.width / 2;
- height[1] = obj_image->image.height / 2;
- pitch[1] = obj_image->image.pitches[U];
- offset[1] = obj_image->image.offsets[U];
- width[2] = obj_image->image.width / 2;
- height[2] = obj_image->image.height / 2;
- pitch[2] = obj_image->image.pitches[V];
- offset[2] = obj_image->image.offsets[V];
+ assert(fourcc_info->num_components == 3);
+
+ U = fourcc_info->components[1].plane;
+ V = fourcc_info->components[2].plane;
+ assert((U == 1 && V == 2) ||
+ (U == 2 && V == 1));
}
+
+ /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
+ width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
+ height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
+ pitch[1] = obj_image->image.pitches[U];
+ offset[1] = obj_image->image.offsets[U];
+
+ width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
+ height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
+ pitch[2] = obj_image->image.pitches[V];
+ offset[2] = obj_image->image.offsets[V];
}
if (is_target) {
@@ -1817,61 +1884,63 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc
width[0] / 4, height[0], pitch[0],
I965_SURFACEFORMAT_R8_UINT,
base_index, 1);
- if (rgbx_format) {
- struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
- /* the format is MSB: X-B-G-R */
- pp_static_parameter->grf2.save_avs_rgb_swap = 0;
- if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) ||
- (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
- /* It is stored as MSB: X-R-G-B */
- pp_static_parameter->grf2.save_avs_rgb_swap = 1;
- }
- }
- if (!packed_yuv && !rgbx_format) {
- if (interleaved_uv) {
- gen7_pp_set_surface_state(ctx, pp_context,
- bo, offset[1],
- width[1] / 2, height[1], pitch[1],
- I965_SURFACEFORMAT_R8G8_SINT,
- base_index + 1, 1);
- } else {
- gen7_pp_set_surface_state(ctx, pp_context,
- bo, offset[1],
- width[1] / 4, height[1], pitch[1],
- I965_SURFACEFORMAT_R8_SINT,
- base_index + 1, 1);
- gen7_pp_set_surface_state(ctx, pp_context,
- bo, offset[2],
- width[2] / 4, height[2], pitch[2],
- I965_SURFACEFORMAT_R8_SINT,
- base_index + 2, 1);
+
+ if (fourcc_info->num_planes == 2) {
+ gen7_pp_set_surface_state(ctx, pp_context,
+ bo, offset[1],
+ width[1] / 2, height[1], pitch[1],
+ I965_SURFACEFORMAT_R8G8_SINT,
+ base_index + 1, 1);
+ } else if (fourcc_info->num_planes == 3) {
+ gen7_pp_set_surface_state(ctx, pp_context,
+ bo, offset[1],
+ width[1] / 4, height[1], pitch[1],
+ I965_SURFACEFORMAT_R8_SINT,
+ base_index + 1, 1);
+ gen7_pp_set_surface_state(ctx, pp_context,
+ bo, offset[2],
+ width[2] / 4, height[2], pitch[2],
+ I965_SURFACEFORMAT_R8_SINT,
+ base_index + 2, 1);
+ }
+
+ if (fourcc_info->format == I965_COLOR_RGB) {
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ /* the format is MSB: X-B-G-R */
+ pp_static_parameter->grf2.save_avs_rgb_swap = 0;
+ if ((fourcc == VA_FOURCC_BGRA) ||
+ (fourcc == VA_FOURCC_BGRX)) {
+ /* It is stored as MSB: X-R-G-B */
+ pp_static_parameter->grf2.save_avs_rgb_swap = 1;
}
}
} else {
int format0 = SURFACE_FORMAT_Y8_UNORM;
switch (fourcc) {
- case VA_FOURCC('Y', 'U', 'Y', '2'):
+ case VA_FOURCC_YUY2:
format0 = SURFACE_FORMAT_YCRCB_NORMAL;
break;
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ case VA_FOURCC_UYVY:
format0 = SURFACE_FORMAT_YCRCB_SWAPY;
break;
default:
break;
}
- if (rgbx_format) {
+
+ if (fourcc_info->format == I965_COLOR_RGB) {
struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
/* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
pp_static_parameter->grf2.src_avs_rgb_swap = 0;
- if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) ||
- (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
+ if ((fourcc == VA_FOURCC_BGRA) ||
+ (fourcc == VA_FOURCC_BGRX)) {
pp_static_parameter->grf2.src_avs_rgb_swap = 1;
}
}
+
gen7_pp_set_surface2_state(ctx, pp_context,
bo, offset[0],
width[0], height[0], pitch[0],
@@ -1879,28 +1948,26 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc
format0, 0,
base_index);
- if (!packed_yuv && !rgbx_format) {
- if (interleaved_uv) {
- gen7_pp_set_surface2_state(ctx, pp_context,
- bo, offset[1],
- width[1], height[1], pitch[1],
- 0, 0,
- SURFACE_FORMAT_R8B8_UNORM, 0,
- base_index + 1);
- } else {
- gen7_pp_set_surface2_state(ctx, pp_context,
- bo, offset[1],
- width[1], height[1], pitch[1],
- 0, 0,
- SURFACE_FORMAT_R8_UNORM, 0,
- base_index + 1);
- gen7_pp_set_surface2_state(ctx, pp_context,
- bo, offset[2],
- width[2], height[2], pitch[2],
- 0, 0,
- SURFACE_FORMAT_R8_UNORM, 0,
- base_index + 2);
- }
+ if (fourcc_info->num_planes == 2) {
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[1],
+ width[1], height[1], pitch[1],
+ 0, 0,
+ SURFACE_FORMAT_R8B8_UNORM, 0,
+ base_index + 1);
+ } else if (fourcc_info->num_planes == 3) {
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[1],
+ width[1], height[1], pitch[1],
+ 0, 0,
+ SURFACE_FORMAT_R8_UNORM, 0,
+ base_index + 1);
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[2],
+ width[2], height[2], pitch[2],
+ 0, 0,
+ SURFACE_FORMAT_R8_UNORM, 0,
+ base_index + 2);
}
}
}
@@ -1934,6 +2001,7 @@ pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp
/* private function & data */
pp_context->pp_x_steps = pp_null_x_steps;
pp_context->pp_y_steps = pp_null_y_steps;
+ pp_context->private_context = NULL;
pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
dst_surface->flags = src_surface->flags;
@@ -1959,7 +2027,7 @@ static int
pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
- struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
+ struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
@@ -2008,7 +2076,7 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin
const VARectangle *dst_rect,
void *filter_param)
{
- struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
+ struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
int width[3], height[3], pitch[3], offset[3];
@@ -2024,6 +2092,7 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin
/* private function & data */
pp_context->pp_x_steps = pp_load_save_x_steps;
pp_context->pp_y_steps = pp_load_save_y_steps;
+ pp_context->private_context = &pp_context->pp_load_save_context;
pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
@@ -2064,7 +2133,7 @@ pp_scaling_y_steps(void *private_context)
static int
pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
- struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
+ struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
@@ -2086,7 +2155,7 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con
const VARectangle *dst_rect,
void *filter_param)
{
- struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
+ struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
struct object_surface *obj_surface;
@@ -2156,6 +2225,7 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con
/* private function & data */
pp_context->pp_x_steps = pp_scaling_x_steps;
pp_context->pp_y_steps = pp_scaling_y_steps;
+ pp_context->private_context = &pp_context->pp_scaling_context;
pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
@@ -2195,7 +2265,7 @@ pp_avs_y_steps(void *private_context)
static int
pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
- struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
float src_x_steping, src_y_steping, video_step_delta;
@@ -2302,7 +2372,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
void *filter_param,
int nlas)
{
- struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
struct object_surface *obj_surface;
@@ -2545,6 +2615,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
/* private function & data */
pp_context->pp_x_steps = pp_avs_x_steps;
pp_context->pp_y_steps = pp_avs_y_steps;
+ pp_context->private_context = &pp_context->pp_avs_context;
pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
@@ -2624,7 +2695,7 @@ gen7_pp_avs_y_steps(void *private_context)
static int
gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
- struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
@@ -2642,11 +2713,11 @@ static void gen7_update_src_surface_uv_offset(VADriverContextP ctx,
struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
int fourcc = pp_get_surface_fourcc(ctx, surface);
- if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
+ if (fourcc == VA_FOURCC_YUY2) {
pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
- } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ } else if (fourcc == VA_FOURCC_UYVY) {
pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
@@ -2661,7 +2732,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
const VARectangle *dst_rect,
void *filter_param)
{
- struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
struct gen7_sampler_8x8 *sampler_8x8;
@@ -2672,12 +2743,14 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
/* source surface */
gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
+ src_rect,
width, height, pitch, offset);
src_width = width[0];
src_height = height[0];
/* destination surface */
gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
+ dst_rect,
width, height, pitch, offset);
/* sampler 8x8 state */
@@ -2827,6 +2900,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
/* private function & data */
pp_context->pp_x_steps = gen7_pp_avs_x_steps;
pp_context->pp_y_steps = gen7_pp_avs_y_steps;
+ pp_context->private_context = &pp_context->pp_avs_context;
pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
pp_avs_context->dest_x = dst_rect->x;
@@ -2838,233 +2912,35 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
pp_avs_context->horiz_range = (float)src_rect->width / src_width;
int dw = (pp_avs_context->src_w - 1) / 16 + 1;
- dw = MAX(dw, pp_avs_context->dest_w);
+ dw = MAX(dw, dst_rect->width);
pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
- if (IS_HASWELL(i965->intel.device_id))
+ if (IS_HASWELL(i965->intel.device_info))
pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
-
- pp_static_parameter->grf2.avs_wa_width = dw;
- pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
- pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
-
- pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
- pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
- pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
- (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
- pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
- (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
-
- gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
-
- dst_surface->flags = src_surface->flags;
-
- return VA_STATUS_SUCCESS;
-}
-
-
-static VAStatus
-gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
- const struct i965_surface *src_surface,
- const VARectangle *src_rect,
- struct i965_surface *dst_surface,
- const VARectangle *dst_rect,
- void *filter_param)
-{
- struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
- struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
- struct gen7_sampler_8x8 *sampler_8x8;
- struct i965_sampler_8x8_state *sampler_8x8_state;
- int index, i;
- int width[3], height[3], pitch[3], offset[3];
- int src_width, src_height;
- /* source surface */
- gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
- width, height, pitch, offset);
- src_width = width[0];
- src_height = height[0];
-
- /* destination surface */
- gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
- width, height, pitch, offset);
-
- /* sampler 8x8 state */
- dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
- assert(pp_context->sampler_state_table.bo_8x8->virtual);
- assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
- sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
- memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
-
- /* The sampler_state setting of RGBX surface will be different with
- * that for NV12/I420 surface.
- */
- for (i = 0; i < 17; i++) {
- float coff;
- coff = i;
- coff = coff / 16;
- /* for Y channel, currently ignore */
- sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
- sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
- sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
- sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
- sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
- sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
- sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
- sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
- sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
- sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
- sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
- sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
- /* for U/V channel, 0.25 */
- sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
- sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
- sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x00;
- sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x00;
- sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
- sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
- sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
- sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
- sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x00;
- sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
- sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x00;
- sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
- sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
+ if (pp_static_parameter->grf2.avs_wa_enable) {
+ int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
+ if ((src_fourcc == VA_FOURCC_RGBA) ||
+ (src_fourcc == VA_FOURCC_RGBX) ||
+ (src_fourcc == VA_FOURCC_BGRA) ||
+ (src_fourcc == VA_FOURCC_BGRX)) {
+ pp_static_parameter->grf2.avs_wa_enable = 0;
+ }
}
-
- sampler_8x8_state->dw136.default_sharpness_level = 0;
- sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
- sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
- sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
- dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
-
- /* sampler 8x8 */
- dri_bo_map(pp_context->sampler_state_table.bo, True);
- assert(pp_context->sampler_state_table.bo->virtual);
- assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
- sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
-
- /* sample_8x8 Y index 4 */
- index = 4;
- memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
- sampler_8x8[index].dw0.global_noise_estimation = 255;
- sampler_8x8[index].dw0.ief_bypass = 1;
-
- sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
-
- sampler_8x8[index].dw2.weak_edge_threshold = 1;
- sampler_8x8[index].dw2.strong_edge_threshold = 8;
- sampler_8x8[index].dw2.r5x_coefficient = 9;
- sampler_8x8[index].dw2.r5cx_coefficient = 8;
- sampler_8x8[index].dw2.r5c_coefficient = 3;
-
- sampler_8x8[index].dw3.r3x_coefficient = 27;
- sampler_8x8[index].dw3.r3c_coefficient = 5;
- sampler_8x8[index].dw3.gain_factor = 40;
- sampler_8x8[index].dw3.non_edge_weight = 1;
- sampler_8x8[index].dw3.regular_weight = 2;
- sampler_8x8[index].dw3.strong_edge_weight = 7;
- sampler_8x8[index].dw3.ief4_smooth_enable = 0;
-
- dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- 0,
- sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
- pp_context->sampler_state_table.bo_8x8);
-
- /* sample_8x8 UV index 8 */
- index = 8;
- memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
- sampler_8x8[index].dw0.disable_8x8_filter = 0;
- sampler_8x8[index].dw0.global_noise_estimation = 255;
- sampler_8x8[index].dw0.ief_bypass = 1;
- sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
- sampler_8x8[index].dw2.weak_edge_threshold = 1;
- sampler_8x8[index].dw2.strong_edge_threshold = 8;
- sampler_8x8[index].dw2.r5x_coefficient = 9;
- sampler_8x8[index].dw2.r5cx_coefficient = 8;
- sampler_8x8[index].dw2.r5c_coefficient = 3;
- sampler_8x8[index].dw3.r3x_coefficient = 27;
- sampler_8x8[index].dw3.r3c_coefficient = 5;
- sampler_8x8[index].dw3.gain_factor = 40;
- sampler_8x8[index].dw3.non_edge_weight = 1;
- sampler_8x8[index].dw3.regular_weight = 2;
- sampler_8x8[index].dw3.strong_edge_weight = 7;
- sampler_8x8[index].dw3.ief4_smooth_enable = 0;
-
- dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- 0,
- sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
- pp_context->sampler_state_table.bo_8x8);
-
- /* sampler_8x8 V, index 12 */
- index = 12;
- memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
- sampler_8x8[index].dw0.disable_8x8_filter = 0;
- sampler_8x8[index].dw0.global_noise_estimation = 255;
- sampler_8x8[index].dw0.ief_bypass = 1;
- sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
- sampler_8x8[index].dw2.weak_edge_threshold = 1;
- sampler_8x8[index].dw2.strong_edge_threshold = 8;
- sampler_8x8[index].dw2.r5x_coefficient = 9;
- sampler_8x8[index].dw2.r5cx_coefficient = 8;
- sampler_8x8[index].dw2.r5c_coefficient = 3;
- sampler_8x8[index].dw3.r3x_coefficient = 27;
- sampler_8x8[index].dw3.r3c_coefficient = 5;
- sampler_8x8[index].dw3.gain_factor = 40;
- sampler_8x8[index].dw3.non_edge_weight = 1;
- sampler_8x8[index].dw3.regular_weight = 2;
- sampler_8x8[index].dw3.strong_edge_weight = 7;
- sampler_8x8[index].dw3.ief4_smooth_enable = 0;
-
- dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- 0,
- sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
- pp_context->sampler_state_table.bo_8x8);
-
- dri_bo_unmap(pp_context->sampler_state_table.bo);
-
- /* private function & data */
- pp_context->pp_x_steps = gen7_pp_avs_x_steps;
- pp_context->pp_y_steps = gen7_pp_avs_y_steps;
- pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
-
- pp_avs_context->dest_x = dst_rect->x;
- pp_avs_context->dest_y = dst_rect->y;
- pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
- pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
- pp_avs_context->src_w = src_rect->width;
- pp_avs_context->src_h = src_rect->height;
- pp_avs_context->horiz_range = (float)src_rect->width / src_width;
-
- int dw = (pp_avs_context->src_w - 1) / 16 + 1;
- dw = MAX(dw, pp_avs_context->dest_w);
-
- pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
- pp_static_parameter->grf2.avs_wa_enable = 0; /* It is unnecessary to use WA for RGBX surface */
- pp_static_parameter->grf2.avs_wa_width = dw;
- pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
- pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
+
+ pp_static_parameter->grf2.avs_wa_width = src_width;
+ pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
+ pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
+ pp_static_parameter->grf2.alpha = 255;
pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
- pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
+ pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
- (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
+ (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
- (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
+ (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
+
gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
dst_surface->flags = src_surface->flags;
@@ -3106,77 +2982,177 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex
void *filter_param)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+ struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
- struct object_surface *obj_surface;
+ struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
struct i965_sampler_dndi *sampler_dndi;
int index;
int w, h;
int orig_w, orig_h;
int dndi_top_first = 1;
+ VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
+ int is_first_frame = (pp_dndi_context->frame_order == -1);
- if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
- return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
-
- if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
- dndi_top_first = 1;
- else
+ if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
dndi_top_first = 0;
+ else
+ dndi_top_first = 1;
/* surface */
- obj_surface = (struct object_surface *)src_surface->base;
- orig_w = obj_surface->orig_width;
- orig_h = obj_surface->orig_height;
- w = obj_surface->width;
- h = obj_surface->height;
+ current_in_obj_surface = (struct object_surface *)src_surface->base;
+
+ if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
+ previous_in_obj_surface = current_in_obj_surface;
+ is_first_frame = 1;
+ } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
+ if (pp_dndi_context->frame_order == 0) {
+ VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
+ if (!pipeline_param ||
+ !pipeline_param->num_forward_references ||
+ pipeline_param->forward_references[0] == VA_INVALID_ID) {
+ WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
+
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ } else {
+ previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
+ assert(previous_in_obj_surface && previous_in_obj_surface->bo);
- if (pp_context->stmm.bo == NULL) {
- pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
- "STMM surface",
- w * h,
- 4096);
- assert(pp_context->stmm.bo);
+ is_first_frame = 0;
+ }
+ } else if (pp_dndi_context->frame_order == 1) {
+ vpp_surface_convert(ctx,
+ pp_dndi_context->current_out_obj_surface,
+ (struct object_surface *)dst_surface->base);
+ pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
+ is_first_frame = 0;
+
+ return VA_STATUS_SUCCESS_1;
+ } else {
+ previous_in_obj_surface = current_in_obj_surface;
+ is_first_frame = 1;
+ }
+ } else {
+ return VA_STATUS_ERROR_UNIMPLEMENTED;
}
+ /* source (temporal reference) YUV surface index 5 */
+ orig_w = previous_in_obj_surface->orig_width;
+ orig_h = previous_in_obj_surface->orig_height;
+ w = previous_in_obj_surface->width;
+ h = previous_in_obj_surface->height;
+ i965_pp_set_surface2_state(ctx, pp_context,
+ previous_in_obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 5);
+
+ /* source surface */
+ orig_w = current_in_obj_surface->orig_width;
+ orig_h = current_in_obj_surface->orig_height;
+ w = current_in_obj_surface->width;
+ h = current_in_obj_surface->height;
+
/* source UV surface index 2 */
i965_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, w * h,
+ current_in_obj_surface->bo, w * h,
orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2, 0);
/* source YUV surface index 4 */
i965_pp_set_surface2_state(ctx, pp_context,
- obj_surface->bo, 0,
+ current_in_obj_surface->bo, 0,
orig_w, orig_h, w,
0, h,
SURFACE_FORMAT_PLANAR_420_8, 1,
4);
- /* source STMM surface index 20 */
+ /* source STMM surface index 6 */
+ if (pp_dndi_context->stmm_bo == NULL) {
+ pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "STMM surface",
+ w * h,
+ 4096);
+ assert(pp_dndi_context->stmm_bo);
+ }
+
i965_pp_set_surface_state(ctx, pp_context,
- pp_context->stmm.bo, 0,
+ pp_dndi_context->stmm_bo, 0,
orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
- 20, 1);
+ 6, 0);
- /* destination surface */
- obj_surface = (struct object_surface *)dst_surface->base;
- orig_w = obj_surface->orig_width;
- orig_h = obj_surface->orig_height;
- w = obj_surface->width;
- h = obj_surface->height;
+ /* destination (Previous frame) */
+ previous_out_obj_surface = (struct object_surface *)dst_surface->base;
+ orig_w = previous_out_obj_surface->orig_width;
+ orig_h = previous_out_obj_surface->orig_height;
+ w = previous_out_obj_surface->width;
+ h = previous_out_obj_surface->height;
- /* destination Y surface index 7 */
+ if (is_first_frame) {
+ current_out_obj_surface = previous_out_obj_surface;
+ } else {
+ VAStatus va_status;
+
+ if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
+ unsigned int tiling = 0, swizzle = 0;
+ dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
+
+ va_status = i965_CreateSurfaces(ctx,
+ orig_w,
+ orig_h,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &pp_dndi_context->current_out_surface);
+ assert(va_status == VA_STATUS_SUCCESS);
+ pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
+ assert(pp_dndi_context->current_out_obj_surface);
+ i965_check_alloc_surface_bo(ctx,
+ pp_dndi_context->current_out_obj_surface,
+ tiling != I915_TILING_NONE,
+ VA_FOURCC_NV12,
+ SUBSAMPLE_YUV420);
+ }
+
+ current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
+ }
+
+ /* destination (Previous frame) Y surface index 7 */
i965_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, 0,
+ previous_out_obj_surface->bo, 0,
orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
7, 1);
- /* destination UV surface index 8 */
+ /* destination (Previous frame) UV surface index 8 */
i965_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, w * h,
+ previous_out_obj_surface->bo, w * h,
orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
8, 1);
+
+ /* destination(Current frame) */
+ orig_w = current_out_obj_surface->orig_width;
+ orig_h = current_out_obj_surface->orig_height;
+ w = current_out_obj_surface->width;
+ h = current_out_obj_surface->height;
+
+ /* destination (Current frame) Y surface index xxx */
+ i965_pp_set_surface_state(ctx, pp_context,
+ current_out_obj_surface->bo, 0,
+ orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 10, 1);
+
+ /* destination (Current frame) UV surface index xxx */
+ i965_pp_set_surface_state(ctx, pp_context,
+ current_out_obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 11, 1);
+
+ /* STMM output surface, index 20 */
+ i965_pp_set_surface_state(ctx, pp_context,
+ pp_dndi_context->stmm_bo, 0,
+ orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 20, 1);
+
/* sampler dndi */
dri_bo_map(pp_context->sampler_state_table.bo, True);
assert(pp_context->sampler_state_table.bo->virtual);
@@ -3185,61 +3161,62 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex
/* sample dndi index 1 */
index = 0;
- sampler_dndi[index].dw0.denoise_asd_threshold = 0;
- sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8
- sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240
- sampler_dndi[index].dw0.denoise_stad_threshold = 0;
+ sampler_dndi[index].dw0.denoise_asd_threshold = 38;
+ sampler_dndi[index].dw0.denoise_history_delta = 7; // 0-15, default is 8
+ sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240
+ sampler_dndi[index].dw0.denoise_stad_threshold = 140;
- sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
- sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
+ sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
+ sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
sampler_dndi[index].dw1.stmm_c2 = 1;
- sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
- sampler_dndi[index].dw1.temporal_difference_threshold = 16;
+ sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
+ sampler_dndi[index].dw1.temporal_difference_threshold = 0;
- sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31
- sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15
+ sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31
+ sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1; // 0-15
sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15
- sampler_dndi[index].dw2.good_neighbor_threshold = 4; // 0-63
+ sampler_dndi[index].dw2.good_neighbor_threshold = 12; // 0-63
- sampler_dndi[index].dw3.maximum_stmm = 128;
- sampler_dndi[index].dw3.multipler_for_vecm = 2;
- sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+ sampler_dndi[index].dw3.maximum_stmm = 150;
+ sampler_dndi[index].dw3.multipler_for_vecm = 30;
+ sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
- sampler_dndi[index].dw4.sdi_delta = 8;
- sampler_dndi[index].dw4.sdi_threshold = 128;
- sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
- sampler_dndi[index].dw4.stmm_shift_up = 0;
+ sampler_dndi[index].dw4.sdi_delta = 5;
+ sampler_dndi[index].dw4.sdi_threshold = 100;
+ sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift
+ sampler_dndi[index].dw4.stmm_shift_up = 1;
sampler_dndi[index].dw4.stmm_shift_down = 0;
- sampler_dndi[index].dw4.minimum_stmm = 0;
+ sampler_dndi[index].dw4.minimum_stmm = 118;
- sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
- sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
- sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
- sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
+ sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
+ sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
sampler_dndi[index].dw6.dn_enable = 1;
sampler_dndi[index].dw6.di_enable = 1;
sampler_dndi[index].dw6.di_partial = 0;
sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
sampler_dndi[index].dw6.dndi_stream_id = 0;
- sampler_dndi[index].dw6.dndi_first_frame = 1;
+ sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
sampler_dndi[index].dw6.progressive_dn = 0;
- sampler_dndi[index].dw6.fmd_tear_threshold = 63;
- sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
- sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
+ sampler_dndi[index].dw6.fmd_tear_threshold = 2;
+ sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
+ sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
sampler_dndi[index].dw7.vdi_walker_enable = 0;
- sampler_dndi[index].dw7.column_width_minus1 = 0;
+ sampler_dndi[index].dw7.column_width_minus1 = w / 16;
dri_bo_unmap(pp_context->sampler_state_table.bo);
/* private function & data */
pp_context->pp_x_steps = pp_dndi_x_steps;
pp_context->pp_y_steps = pp_dndi_y_steps;
+ pp_context->private_context = &pp_context->pp_dndi_context;
pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
pp_static_parameter->grf1.statistics_surface_picth = w / 2;
@@ -3257,6 +3234,8 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex
dst_surface->flags = I965_SURFACE_FLAG_FRAME;
+ pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
+
return VA_STATUS_SUCCESS;
}
@@ -3294,7 +3273,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context
void *filter_param)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
+ struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
struct object_surface *obj_surface;
struct i965_sampler_dndi *sampler_dndi;
struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
@@ -3337,12 +3316,12 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context
w = obj_surface->width;
h = obj_surface->height;
- if (pp_context->stmm.bo == NULL) {
- pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
- "STMM surface",
- w * h,
- 4096);
- assert(pp_context->stmm.bo);
+ if (pp_dn_context->stmm_bo == NULL) {
+ pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "STMM surface",
+ w * h,
+ 4096);
+ assert(pp_dn_context->stmm_bo);
}
/* source UV surface index 2 */
@@ -3361,7 +3340,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context
/* source STMM surface index 20 */
i965_pp_set_surface_state(ctx, pp_context,
- pp_context->stmm.bo, 0,
+ pp_dn_context->stmm_bo, 0,
orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
20, 1);
@@ -3446,6 +3425,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context
/* private function & data */
pp_context->pp_x_steps = pp_dn_x_steps;
pp_context->pp_y_steps = pp_dn_y_steps;
+ pp_context->private_context = &pp_context->pp_dn_context;
pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
pp_static_parameter->grf1.statistics_surface_picth = w / 2;
@@ -3502,100 +3482,172 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
void *filter_param)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+ struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
- struct object_surface *obj_surface;
+ struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
struct gen7_sampler_dndi *sampler_dndi;
int index;
int w, h;
int orig_w, orig_h;
int dndi_top_first = 1;
+ VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
+ int is_first_frame = (pp_dndi_context->frame_order == -1);
- if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
- return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
-
- if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
- dndi_top_first = 1;
- else
+ if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
dndi_top_first = 0;
+ else
+ dndi_top_first = 1;
/* surface */
- obj_surface = (struct object_surface *)src_surface->base;
- orig_w = obj_surface->orig_width;
- orig_h = obj_surface->orig_height;
- w = obj_surface->width;
- h = obj_surface->height;
+ current_in_obj_surface = (struct object_surface *)src_surface->base;
+
+ if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
+ previous_in_obj_surface = current_in_obj_surface;
+ is_first_frame = 1;
+ } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
+ if (pp_dndi_context->frame_order == 0) {
+ VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
+ if (!pipeline_param ||
+ !pipeline_param->num_forward_references ||
+ pipeline_param->forward_references[0] == VA_INVALID_ID) {
+ WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
+
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+ } else {
+ previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
+ assert(previous_in_obj_surface && previous_in_obj_surface->bo);
- if (pp_context->stmm.bo == NULL) {
- pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
- "STMM surface",
- w * h,
- 4096);
- assert(pp_context->stmm.bo);
+ is_first_frame = 0;
+ }
+ } else if (pp_dndi_context->frame_order == 1) {
+ vpp_surface_convert(ctx,
+ pp_dndi_context->current_out_obj_surface,
+ (struct object_surface *)dst_surface->base);
+ pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
+ is_first_frame = 0;
+
+ return VA_STATUS_SUCCESS_1;
+ } else {
+ previous_in_obj_surface = current_in_obj_surface;
+ is_first_frame = 1;
+ }
+ } else {
+ return VA_STATUS_ERROR_UNIMPLEMENTED;
}
+ /* source (temporal reference) YUV surface index 4 */
+ orig_w = previous_in_obj_surface->orig_width;
+ orig_h = previous_in_obj_surface->orig_height;
+ w = previous_in_obj_surface->width;
+ h = previous_in_obj_surface->height;
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ previous_in_obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 4);
+
+ /* source surface */
+ orig_w = current_in_obj_surface->orig_width;
+ orig_h = current_in_obj_surface->orig_height;
+ w = current_in_obj_surface->width;
+ h = current_in_obj_surface->height;
+
/* source UV surface index 1 */
gen7_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, w * h,
+ current_in_obj_surface->bo, w * h,
orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1, 0);
/* source YUV surface index 3 */
gen7_pp_set_surface2_state(ctx, pp_context,
- obj_surface->bo, 0,
+ current_in_obj_surface->bo, 0,
orig_w, orig_h, w,
0, h,
SURFACE_FORMAT_PLANAR_420_8, 1,
3);
- /* source (temporal reference) YUV surface index 4 */
- gen7_pp_set_surface2_state(ctx, pp_context,
- obj_surface->bo, 0,
- orig_w, orig_h, w,
- 0, h,
- SURFACE_FORMAT_PLANAR_420_8, 1,
- 4);
-
/* STMM / History Statistics input surface, index 5 */
+ if (pp_dndi_context->stmm_bo == NULL) {
+ pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "STMM surface",
+ w * h,
+ 4096);
+ assert(pp_dndi_context->stmm_bo);
+ }
+
gen7_pp_set_surface_state(ctx, pp_context,
- pp_context->stmm.bo, 0,
+ pp_dndi_context->stmm_bo, 0,
orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
5, 1);
/* destination surface */
- obj_surface = (struct object_surface *)dst_surface->base;
- orig_w = obj_surface->orig_width;
- orig_h = obj_surface->orig_height;
- w = obj_surface->width;
- h = obj_surface->height;
+ previous_out_obj_surface = (struct object_surface *)dst_surface->base;
+ orig_w = previous_out_obj_surface->orig_width;
+ orig_h = previous_out_obj_surface->orig_height;
+ w = previous_out_obj_surface->width;
+ h = previous_out_obj_surface->height;
+
+ if (is_first_frame) {
+ current_out_obj_surface = previous_out_obj_surface;
+ } else {
+ VAStatus va_status;
+
+ if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
+ unsigned int tiling = 0, swizzle = 0;
+ dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
+
+ va_status = i965_CreateSurfaces(ctx,
+ orig_w,
+ orig_h,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &pp_dndi_context->current_out_surface);
+ assert(va_status == VA_STATUS_SUCCESS);
+ pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
+ assert(pp_dndi_context->current_out_obj_surface);
+ i965_check_alloc_surface_bo(ctx,
+ pp_dndi_context->current_out_obj_surface,
+ tiling != I915_TILING_NONE,
+ VA_FOURCC_NV12,
+ SUBSAMPLE_YUV420);
+ }
+
+ current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
+ }
/* destination(Previous frame) Y surface index 27 */
gen7_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, 0,
+ previous_out_obj_surface->bo, 0,
orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
27, 1);
/* destination(Previous frame) UV surface index 28 */
gen7_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, w * h,
+ previous_out_obj_surface->bo, w * h,
orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
28, 1);
/* destination(Current frame) Y surface index 30 */
gen7_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, 0,
+ current_out_obj_surface->bo, 0,
orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
30, 1);
/* destination(Current frame) UV surface index 31 */
+ orig_w = current_out_obj_surface->orig_width;
+ orig_h = current_out_obj_surface->orig_height;
+ w = current_out_obj_surface->width;
+ h = current_out_obj_surface->height;
+
gen7_pp_set_surface_state(ctx, pp_context,
- obj_surface->bo, w * h,
+ current_out_obj_surface->bo, w * h,
orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
31, 1);
/* STMM output surface, index 33 */
gen7_pp_set_surface_state(ctx, pp_context,
- pp_context->stmm.bo, 0,
+ pp_dndi_context->stmm_bo, 0,
orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
33, 1);
@@ -3608,55 +3660,55 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
/* sample dndi index 0 */
index = 0;
- sampler_dndi[index].dw0.denoise_asd_threshold = 0;
- sampler_dndi[index].dw0.dnmh_delt = 8;
+ sampler_dndi[index].dw0.denoise_asd_threshold = 38;
+ sampler_dndi[index].dw0.dnmh_delt = 7;
sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
- sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240
- sampler_dndi[index].dw0.denoise_stad_threshold = 0;
+ sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240
+ sampler_dndi[index].dw0.denoise_stad_threshold = 140;
- sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
- sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
- sampler_dndi[index].dw1.stmm_c2 = 0;
- sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
- sampler_dndi[index].dw1.temporal_difference_threshold = 16;
+ sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
+ sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
+ sampler_dndi[index].dw1.stmm_c2 = 2;
+ sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
+ sampler_dndi[index].dw1.temporal_difference_threshold = 0;
- sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31
+ sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31
sampler_dndi[index].dw2.bne_edge_th = 1;
sampler_dndi[index].dw2.smooth_mv_th = 0;
sampler_dndi[index].dw2.sad_tight_th = 5;
sampler_dndi[index].dw2.cat_slope_minus1 = 9;
- sampler_dndi[index].dw2.good_neighbor_th = 4;
+ sampler_dndi[index].dw2.good_neighbor_th = 12;
- sampler_dndi[index].dw3.maximum_stmm = 128;
- sampler_dndi[index].dw3.multipler_for_vecm = 2;
- sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+ sampler_dndi[index].dw3.maximum_stmm = 150;
+ sampler_dndi[index].dw3.multipler_for_vecm = 30;
+ sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
- sampler_dndi[index].dw4.sdi_delta = 8;
- sampler_dndi[index].dw4.sdi_threshold = 128;
- sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
- sampler_dndi[index].dw4.stmm_shift_up = 0;
+ sampler_dndi[index].dw4.sdi_delta = 5;
+ sampler_dndi[index].dw4.sdi_threshold = 100;
+ sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift
+ sampler_dndi[index].dw4.stmm_shift_up = 1;
sampler_dndi[index].dw4.stmm_shift_down = 0;
- sampler_dndi[index].dw4.minimum_stmm = 0;
+ sampler_dndi[index].dw4.minimum_stmm = 118;
- sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
- sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
- sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
- sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
+ sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
+ sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
sampler_dndi[index].dw6.dn_enable = 0;
sampler_dndi[index].dw6.di_enable = 1;
sampler_dndi[index].dw6.di_partial = 0;
sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
sampler_dndi[index].dw6.dndi_stream_id = 1;
- sampler_dndi[index].dw6.dndi_first_frame = 1;
+ sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
sampler_dndi[index].dw6.progressive_dn = 0;
sampler_dndi[index].dw6.mcdi_enable = 0;
- sampler_dndi[index].dw6.fmd_tear_threshold = 32;
+ sampler_dndi[index].dw6.fmd_tear_threshold = 2;
sampler_dndi[index].dw6.cat_th1 = 0;
- sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
- sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
+ sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
+ sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
sampler_dndi[index].dw7.sad_tha = 5;
sampler_dndi[index].dw7.sad_thb = 10;
@@ -3672,6 +3724,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
/* private function & data */
pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
+ pp_context->private_context = &pp_context->pp_dndi_context;
pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
@@ -3691,6 +3744,8 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
dst_surface->flags = I965_SURFACE_FLAG_FRAME;
+ pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
+
return VA_STATUS_SUCCESS;
}
@@ -3730,7 +3785,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con
void *filter_param)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
+ struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
struct object_surface *obj_surface;
struct gen7_sampler_dndi *sampler_dn;
@@ -3772,12 +3827,12 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con
w = obj_surface->width;
h = obj_surface->height;
- if (pp_context->stmm.bo == NULL) {
- pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
- "STMM surface",
- w * h,
- 4096);
- assert(pp_context->stmm.bo);
+ if (pp_dn_context->stmm_bo == NULL) {
+ pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
+ "STMM surface",
+ w * h,
+ 4096);
+ assert(pp_dn_context->stmm_bo);
}
/* source UV surface index 1 */
@@ -3804,7 +3859,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con
/* STMM / History Statistics input surface, index 5 */
gen7_pp_set_surface_state(ctx, pp_context,
- pp_context->stmm.bo, 0,
+ pp_dn_context->stmm_bo, 0,
orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
33, 1);
@@ -3900,6 +3955,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con
/* private function & data */
pp_context->pp_x_steps = gen7_pp_dn_x_steps;
pp_context->pp_y_steps = gen7_pp_dn_y_steps;
+ pp_context->private_context = &pp_context->pp_dn_context;
pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
@@ -4131,7 +4187,7 @@ gen6_pp_initialize(
assert(bo);
pp_context->vfe_state.bo = bo;
- if (IS_GEN7(i965->intel.device_id)) {
+ if (IS_GEN7(i965->intel.device_info)) {
static_param_size = sizeof(struct gen7_pp_static_parameter);
inline_param_size = sizeof(struct gen7_pp_inline_parameter);
} else {
@@ -4161,6 +4217,7 @@ gen6_pp_initialize(
return va_status;
}
+
static void
gen6_pp_interface_descriptor_table(VADriverContextP ctx,
struct i965_post_processing_context *pp_context)
@@ -4186,7 +4243,7 @@ gen6_pp_interface_descriptor_table(VADriverContextP ctx,
desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
desc->desc4.constant_urb_entry_read_offset = 0;
- if (IS_GEN7(i965->intel.device_id))
+ if (IS_GEN7(i965->intel.device_info))
desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
else
desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
@@ -4218,7 +4275,7 @@ gen6_pp_upload_constants(VADriverContextP ctx,
assert(sizeof(struct pp_static_parameter) == 128);
assert(sizeof(struct gen7_pp_static_parameter) == 192);
- if (IS_GEN7(i965->intel.device_id))
+ if (IS_GEN7(i965->intel.device_info))
param_size = sizeof(struct gen7_pp_static_parameter);
else
param_size = sizeof(struct pp_static_parameter);
@@ -4279,12 +4336,14 @@ gen6_pp_vfe_state(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
- (pp_context->urb.num_vfe_entries - 1) << 16 |
- pp_context->urb.num_vfe_entries << 8);
+ (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
+ pp_context->vfe_gpu_state.num_urb_entries << 8);
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
- (pp_context->urb.size_vfe_entry * 2) << 16 | /* URB Entry Allocation Size, in 256 bits unit */
- (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
+ (pp_context->vfe_gpu_state.urb_entry_size) << 16 |
+ /* URB Entry Allocation Size, in 256 bits unit */
+ (pp_context->vfe_gpu_state.curbe_allocation_size));
+ /* CURBE Allocation Size, in 256 bits unit */
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
@@ -4296,14 +4355,19 @@ gen6_pp_curbe_load(VADriverContextP ctx,
struct i965_post_processing_context *pp_context)
{
struct intel_batchbuffer *batch = pp_context->batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int param_size;
- assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
+ if (IS_GEN7(i965->intel.device_info))
+ param_size = sizeof(struct gen7_pp_static_parameter);
+ else
+ param_size = sizeof(struct pp_static_parameter);
BEGIN_BATCH(batch, 4);
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
- pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
+ param_size);
OUT_RELOC(batch,
pp_context->curbe.bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -4380,13 +4444,13 @@ gen6_pp_object_walker(VADriverContextP ctx,
dri_bo *command_buffer;
unsigned int *command_ptr;
- if (IS_GEN7(i965->intel.device_id))
+ if (IS_GEN7(i965->intel.device_info))
param_size = sizeof(struct gen7_pp_inline_parameter);
else
param_size = sizeof(struct pp_inline_parameter);
- x_steps = pp_context->pp_x_steps(&pp_context->private_context);
- y_steps = pp_context->pp_y_steps(&pp_context->private_context);
+ x_steps = pp_context->pp_x_steps(pp_context->private_context);
+ y_steps = pp_context->pp_y_steps(pp_context->private_context);
command_length_in_dws = 6 + (param_size >> 2);
command_buffer = dri_bo_alloc(i965->intel.bufmgr,
"command objects buffer",
@@ -4400,7 +4464,7 @@ gen6_pp_object_walker(VADriverContextP ctx,
for (x = 0; x < x_steps; x++) {
if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
// some common block parameter update goes here, apply to all pp functions
- if (IS_GEN6(i965->intel.device_id))
+ if (IS_GEN6(i965->intel.device_info))
update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
*command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
@@ -4423,12 +4487,12 @@ gen6_pp_object_walker(VADriverContextP ctx,
dri_bo_unmap(command_buffer);
BEGIN_BATCH(batch, 2);
- OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
- OUT_RELOC(batch, command_buffer,
- I915_GEM_DOMAIN_COMMAND, 0,
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
+ OUT_RELOC(batch, command_buffer,
+ I915_GEM_DOMAIN_COMMAND, 0,
0);
ADVANCE_BATCH(batch);
-
+
dri_bo_unreference(command_buffer);
/* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
@@ -4483,6 +4547,9 @@ gen6_post_processing(
gen6_pp_pipeline_setup(ctx, pp_context);
}
+ if (va_status == VA_STATUS_SUCCESS_1)
+ va_status = VA_STATUS_SUCCESS;
+
return va_status;
}
@@ -4501,27 +4568,18 @@ i965_post_processing_internal(
VAStatus va_status;
struct i965_driver_data *i965 = i965_driver_data(ctx);
- if (IS_GEN6(i965->intel.device_id) ||
- IS_GEN7(i965->intel.device_id))
- va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
- else
- va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
-
+ if (pp_context && pp_context->intel_post_processing) {
+ va_status = (pp_context->intel_post_processing)(ctx, pp_context,
+ src_surface, src_rect,
+ dst_surface, dst_rect,
+ pp_index, filter_param);
+ } else {
+ va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
+ }
+
return va_status;
}
-VAStatus
-i965_DestroySurfaces(VADriverContextP ctx,
- VASurfaceID *surface_list,
- int num_surfaces);
-VAStatus
-i965_CreateSurfaces(VADriverContextP ctx,
- int width,
- int height,
- int format,
- int num_surfaces,
- VASurfaceID *surfaces);
-
static void
rgb_to_yuv(unsigned int argb,
unsigned char *y,
@@ -4554,7 +4612,7 @@ i965_vpp_clear_surface(VADriverContextP ctx,
int region_width, region_height;
/* Currently only support NV12 surface */
- if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
+ if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
return;
rgb_to_yuv(color, &y, &u, &v, &a);
@@ -4576,13 +4634,13 @@ i965_vpp_clear_surface(VADriverContextP ctx,
br13 |= BR13_8;
br13 |= pitch;
- if (IS_GEN6(i965->intel.device_id) ||
- IS_GEN7(i965->intel.device_id)) {
- intel_batchbuffer_start_atomic_blt(batch, 48);
- BEGIN_BLT_BATCH(batch, 12);
- } else {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
intel_batchbuffer_start_atomic(batch, 48);
BEGIN_BATCH(batch, 12);
+ } else {
+ /* Will double-check the command if the new chipset is added */
+ intel_batchbuffer_start_atomic_blt(batch, 48);
+ BEGIN_BLT_BATCH(batch, 12);
}
region_width = obj_surface->width;
@@ -4641,10 +4699,10 @@ i965_scaling_processing(
VAStatus va_status = VA_STATUS_SUCCESS;
struct i965_driver_data *i965 = i965_driver_data(ctx);
- assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
- assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+ assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
+ assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
- if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
+ if (HAS_VPP(i965) && (flags & I965_PP_FLAG_AVS)) {
struct i965_surface src_surface;
struct i965_surface dst_surface;
@@ -4687,13 +4745,13 @@ i965_post_processing(
*has_done_scaling = 0;
- if (HAS_PP(i965)) {
+ if (HAS_VPP(i965)) {
VAStatus status;
struct i965_surface src_surface;
struct i965_surface dst_surface;
/* Currently only support post processing for NV12 surface */
- if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
+ if (obj_surface->fourcc != VA_FOURCC_NV12)
return out_surface_id;
_i965LockMutex(&i965->pp_mutex);
@@ -4713,7 +4771,7 @@ i965_post_processing(
assert(status == VA_STATUS_SUCCESS);
obj_surface = SURFACE(out_surface_id);
assert(obj_surface);
- i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0);
dst_surface.base = (struct object_base *)obj_surface;
@@ -4749,7 +4807,7 @@ i965_post_processing(
assert(status == VA_STATUS_SUCCESS);
obj_surface = SURFACE(out_surface_id);
assert(obj_surface);
- i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0);
dst_surface.base = (struct object_base *)obj_surface;
@@ -4777,6 +4835,70 @@ i965_post_processing(
}
static VAStatus
+i965_image_pl2_processing(VADriverContextP ctx,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect);
+
+static VAStatus
+i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
+ VAStatus (*i965_image_plx_nv12_processing)(
+ VADriverContextP,
+ const struct i965_surface *,
+ const VARectangle *,
+ struct i965_surface *,
+ const VARectangle *),
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ VAStatus status;
+ VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
+ struct object_surface *obj_surface = NULL;
+ struct i965_surface tmp_surface;
+ int width, height;
+
+ pp_get_surface_size(ctx, dst_surface, &width, &height);
+ status = i965_CreateSurfaces(ctx,
+ width,
+ height,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &tmp_surface_id);
+ assert(status == VA_STATUS_SUCCESS);
+ obj_surface = SURFACE(tmp_surface_id);
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+
+ tmp_surface.base = (struct object_base *)obj_surface;
+ tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
+ tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
+
+ status = i965_image_plx_nv12_processing(ctx,
+ src_surface,
+ src_rect,
+ &tmp_surface,
+ dst_rect);
+
+ if (status == VA_STATUS_SUCCESS)
+ status = i965_image_pl2_processing(ctx,
+ &tmp_surface,
+ dst_rect,
+ dst_surface,
+ dst_rect);
+
+ i965_DestroySurfaces(ctx,
+ &tmp_surface_id,
+ 1);
+
+ return status;
+}
+
+
+static VAStatus
i965_image_pl1_rgbx_processing(VADriverContextP ctx,
const struct i965_surface *src_surface,
const VARectangle *src_rect,
@@ -4786,23 +4908,31 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_post_processing_context *pp_context = i965->pp_context;
int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+ VAStatus vaStatus;
- if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
- i965_post_processing_internal(ctx, i965->pp_context,
- src_surface,
- src_rect,
- dst_surface,
- dst_rect,
- PP_RGBX_LOAD_SAVE_NV12,
- NULL);
- } else {
- assert(0);
- return VA_STATUS_ERROR_UNKNOWN;
- }
+ switch (fourcc) {
+ case VA_FOURCC_NV12:
+ vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_RGBX_LOAD_SAVE_NV12,
+ NULL);
+ intel_batchbuffer_flush(pp_context->batch);
+ break;
- intel_batchbuffer_flush(pp_context->batch);
+ default:
+ vaStatus = i965_image_plx_nv12_plx_processing(ctx,
+ i965_image_pl1_rgbx_processing,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect);
+ break;
+ }
- return VA_STATUS_SUCCESS;
+ return vaStatus;
}
static VAStatus
@@ -4817,7 +4947,8 @@ i965_image_pl3_processing(VADriverContextP ctx,
int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
- if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+ switch (fourcc) {
+ case VA_FOURCC_NV12:
vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
src_surface,
src_rect,
@@ -4825,10 +4956,13 @@ i965_image_pl3_processing(VADriverContextP ctx,
dst_rect,
PP_PL3_LOAD_SAVE_N12,
NULL);
- } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
- fourcc == VA_FOURCC('I', 'M', 'C', '3') ||
- fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
- fourcc == VA_FOURCC('I', '4', '2', '0')) {
+ intel_batchbuffer_flush(pp_context->batch);
+ break;
+
+ case VA_FOURCC_IMC1:
+ case VA_FOURCC_IMC3:
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_I420:
vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
src_surface,
src_rect,
@@ -4836,8 +4970,11 @@ i965_image_pl3_processing(VADriverContextP ctx,
dst_rect,
PP_PL3_LOAD_SAVE_PL3,
NULL);
- } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
- fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ intel_batchbuffer_flush(pp_context->batch);
+ break;
+
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
src_surface,
src_rect,
@@ -4845,12 +4982,18 @@ i965_image_pl3_processing(VADriverContextP ctx,
dst_rect,
PP_PL3_LOAD_SAVE_PA,
NULL);
- }
- else {
- assert(0);
- }
+ intel_batchbuffer_flush(pp_context->batch);
+ break;
- intel_batchbuffer_flush(pp_context->batch);
+ default:
+ vaStatus = i965_image_plx_nv12_plx_processing(ctx,
+ i965_image_pl3_processing,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect);
+ break;
+ }
return vaStatus;
}
@@ -4867,7 +5010,8 @@ i965_image_pl2_processing(VADriverContextP ctx,
int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
- if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+ switch (fourcc) {
+ case VA_FOURCC_NV12:
vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
src_surface,
src_rect,
@@ -4875,10 +5019,12 @@ i965_image_pl2_processing(VADriverContextP ctx,
dst_rect,
PP_NV12_LOAD_SAVE_N12,
NULL);
- } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
- fourcc == VA_FOURCC('I', 'M', 'C', '3') ||
- fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
- fourcc == VA_FOURCC('I', '4', '2', '0') ) {
+ break;
+
+ case VA_FOURCC_IMC1:
+ case VA_FOURCC_IMC3:
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_I420:
vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
src_surface,
src_rect,
@@ -4886,19 +5032,23 @@ i965_image_pl2_processing(VADriverContextP ctx,
dst_rect,
PP_NV12_LOAD_SAVE_PL3,
NULL);
- } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
- fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ break;
+
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
src_surface,
src_rect,
dst_surface,
dst_rect,
PP_NV12_LOAD_SAVE_PA,
- NULL);
- } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') ||
- fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
- fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
- fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
+ NULL);
+ break;
+
+ case VA_FOURCC_BGRX:
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_RGBX:
+ case VA_FOURCC_RGBA:
vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
src_surface,
src_rect,
@@ -4906,9 +5056,10 @@ i965_image_pl2_processing(VADriverContextP ctx,
dst_rect,
PP_NV12_LOAD_SAVE_RGBX,
NULL);
- } else {
- assert(0);
- return VA_STATUS_ERROR_UNKNOWN;
+ break;
+
+ default:
+ return VA_STATUS_ERROR_UNIMPLEMENTED;
}
intel_batchbuffer_flush(pp_context->batch);
@@ -4926,31 +5077,54 @@ i965_image_pl1_processing(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_post_processing_context *pp_context = i965->pp_context;
int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+ VAStatus vaStatus;
- if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
- i965_post_processing_internal(ctx, i965->pp_context,
- src_surface,
- src_rect,
- dst_surface,
- dst_rect,
- PP_PA_LOAD_SAVE_NV12,
- NULL);
- } else if (fourcc == VA_FOURCC_YV12) {
- i965_post_processing_internal(ctx, i965->pp_context,
- src_surface,
- src_rect,
- dst_surface,
- dst_rect,
- PP_PA_LOAD_SAVE_PL3,
- NULL);
+ switch (fourcc) {
+ case VA_FOURCC_NV12:
+ vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_PA_LOAD_SAVE_NV12,
+ NULL);
+ intel_batchbuffer_flush(pp_context->batch);
+ break;
- } else {
- return VA_STATUS_ERROR_UNKNOWN;
- }
+ case VA_FOURCC_YV12:
+ vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_PA_LOAD_SAVE_PL3,
+ NULL);
+ intel_batchbuffer_flush(pp_context->batch);
+ break;
- intel_batchbuffer_flush(pp_context->batch);
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
+ vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_PA_LOAD_SAVE_PA,
+ NULL);
+ intel_batchbuffer_flush(pp_context->batch);
+ break;
- return VA_STATUS_SUCCESS;
+ default:
+ vaStatus = i965_image_plx_nv12_plx_processing(ctx,
+ i965_image_pl1_processing,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect);
+ break;
+ }
+
+ return vaStatus;
}
VAStatus
@@ -4963,16 +5137,21 @@ i965_image_processing(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
- if (HAS_PP(i965)) {
+ if (HAS_VPP(i965)) {
int fourcc = pp_get_surface_fourcc(ctx, src_surface);
_i965LockMutex(&i965->pp_mutex);
switch (fourcc) {
- case VA_FOURCC('Y', 'V', '1', '2'):
- case VA_FOURCC('I', '4', '2', '0'):
- case VA_FOURCC('I', 'M', 'C', '1'):
- case VA_FOURCC('I', 'M', 'C', '3'):
+ case VA_FOURCC_YV12:
+ case VA_FOURCC_I420:
+ case VA_FOURCC_IMC1:
+ case VA_FOURCC_IMC3:
+ case VA_FOURCC_422H:
+ case VA_FOURCC_422V:
+ case VA_FOURCC_411P:
+ case VA_FOURCC_444P:
+ case VA_FOURCC_YV16:
status = i965_image_pl3_processing(ctx,
src_surface,
src_rect,
@@ -4980,25 +5159,25 @@ i965_image_processing(VADriverContextP ctx,
dst_rect);
break;
- case VA_FOURCC('N', 'V', '1', '2'):
+ case VA_FOURCC_NV12:
status = i965_image_pl2_processing(ctx,
src_surface,
src_rect,
dst_surface,
dst_rect);
break;
- case VA_FOURCC('Y', 'U', 'Y', '2'):
- case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ case VA_FOURCC_YUY2:
+ case VA_FOURCC_UYVY:
status = i965_image_pl1_processing(ctx,
src_surface,
src_rect,
dst_surface,
dst_rect);
break;
- case VA_FOURCC('B', 'G', 'R', 'A'):
- case VA_FOURCC('B', 'G', 'R', 'X'):
- case VA_FOURCC('R', 'G', 'B', 'A'):
- case VA_FOURCC('R', 'G', 'B', 'X'):
+ case VA_FOURCC_BGRA:
+ case VA_FOURCC_BGRX:
+ case VA_FOURCC_RGBA:
+ case VA_FOURCC_RGBX:
status = i965_image_pl1_rgbx_processing(ctx,
src_surface,
src_rect,
@@ -5043,8 +5222,11 @@ i965_post_processing_context_finalize(struct i965_post_processing_context *pp_co
dri_bo_unreference(pp_context->vfe_state.bo);
pp_context->vfe_state.bo = NULL;
- dri_bo_unreference(pp_context->stmm.bo);
- pp_context->stmm.bo = NULL;
+ dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
+ pp_context->pp_dndi_context.stmm_bo = NULL;
+
+ dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
+ pp_context->pp_dn_context.stmm_bo = NULL;
for (i = 0; i < NUM_PP_MODULES; i++) {
struct pp_module *pp_module = &pp_context->pp_modules[i];
@@ -5066,49 +5248,59 @@ i965_post_processing_terminate(VADriverContextP ctx)
struct i965_post_processing_context *pp_context = i965->pp_context;
if (pp_context) {
- i965_post_processing_context_finalize(pp_context);
+ pp_context->finalize(pp_context);
free(pp_context);
}
i965->pp_context = NULL;
}
-static void
+#define VPP_CURBE_ALLOCATION_SIZE 32
+
+void
i965_post_processing_context_init(VADriverContextP ctx,
- struct i965_post_processing_context *pp_context,
+ void *data,
struct intel_batchbuffer *batch)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
int i;
+ struct i965_post_processing_context *pp_context = data;
- pp_context->urb.size = URB_SIZE((&i965->intel));
- pp_context->urb.num_vfe_entries = 32;
- pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */
- pp_context->urb.num_cs_entries = 1;
-
- if (IS_GEN7(i965->intel.device_id))
- pp_context->urb.size_cs_entry = 4; /* in 512 bits unit */
- else
+ if (IS_IRONLAKE(i965->intel.device_info)) {
+ pp_context->urb.size = i965->intel.device_info->urb_size;
+ pp_context->urb.num_vfe_entries = 32;
+ pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */
+ pp_context->urb.num_cs_entries = 1;
pp_context->urb.size_cs_entry = 2;
+ pp_context->urb.vfe_start = 0;
+ pp_context->urb.cs_start = pp_context->urb.vfe_start +
+ pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
+ assert(pp_context->urb.cs_start +
+ pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
+ pp_context->intel_post_processing = ironlake_post_processing;
+ } else {
+ pp_context->vfe_gpu_state.max_num_threads = 60;
+ pp_context->vfe_gpu_state.num_urb_entries = 59;
+ pp_context->vfe_gpu_state.gpgpu_mode = 0;
+ pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
+ pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
+ pp_context->intel_post_processing = gen6_post_processing;
+ }
- pp_context->urb.vfe_start = 0;
- pp_context->urb.cs_start = pp_context->urb.vfe_start +
- pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
- assert(pp_context->urb.cs_start +
- pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+ pp_context->finalize = i965_post_processing_context_finalize;
assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
- if (IS_HASWELL(i965->intel.device_id))
+ if (IS_HASWELL(i965->intel.device_info))
memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
- else if (IS_GEN7(i965->intel.device_id))
+ else if (IS_GEN7(i965->intel.device_info))
memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
- else if (IS_GEN6(i965->intel.device_id))
+ else if (IS_GEN6(i965->intel.device_info))
memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
- else if (IS_IRONLAKE(i965->intel.device_id))
+ else if (IS_IRONLAKE(i965->intel.device_info))
memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
for (i = 0; i < NUM_PP_MODULES; i++) {
@@ -5127,7 +5319,7 @@ i965_post_processing_context_init(VADriverContextP ctx,
}
/* static & inline parameters */
- if (IS_GEN7(i965->intel.device_id)) {
+ if (IS_GEN7(i965->intel.device_info)) {
pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
} else {
@@ -5135,6 +5327,9 @@ i965_post_processing_context_init(VADriverContextP ctx,
pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
}
+ pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
+ pp_context->pp_dndi_context.current_out_obj_surface = NULL;
+ pp_context->pp_dndi_context.frame_order = -1;
pp_context->batch = batch;
}
@@ -5144,10 +5339,10 @@ i965_post_processing_init(VADriverContextP ctx)
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_post_processing_context *pp_context = i965->pp_context;
- if (HAS_PP(i965)) {
+ if (HAS_VPP(i965)) {
if (pp_context == NULL) {
pp_context = calloc(1, sizeof(*pp_context));
- i965_post_processing_context_init(ctx, pp_context, i965->batch);
+ i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
i965->pp_context = pp_context;
}
}
@@ -5189,14 +5384,28 @@ i965_proc_picture(VADriverContextP ctx,
unsigned int tiling = 0, swizzle = 0;
int in_width, in_height;
- assert(pipeline_param->surface != VA_INVALID_ID);
- assert(proc_state->current_render_target != VA_INVALID_ID);
+ if (pipeline_param->surface == VA_INVALID_ID ||
+ proc_state->current_render_target == VA_INVALID_ID) {
+ status = VA_STATUS_ERROR_INVALID_SURFACE;
+ goto error;
+ }
obj_surface = SURFACE(pipeline_param->surface);
- assert(obj_surface && obj_surface->bo);
- if (!obj_surface || !obj_surface->bo)
+ if (!obj_surface) {
+ status = VA_STATUS_ERROR_INVALID_SURFACE;
+ goto error;
+ }
+
+ if (!obj_surface->bo) {
+ status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
+ goto error;
+ }
+
+ if (pipeline_param->num_filters && !pipeline_param->filters) {
+ status = VA_STATUS_ERROR_INVALID_PARAMETER;
goto error;
+ }
in_width = obj_surface->orig_width;
in_height = obj_surface->orig_height;
@@ -5207,7 +5416,7 @@ i965_proc_picture(VADriverContextP ctx,
src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
VASurfaceID out_surface_id = VA_INVALID_ID;
- if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
+ if (obj_surface->fourcc != VA_FOURCC_NV12) {
src_surface.base = (struct object_base *)obj_surface;
src_surface.type = I965_SURFACE_TYPE_SURFACE;
src_surface.flags = I965_SURFACE_FLAG_FRAME;
@@ -5226,7 +5435,7 @@ i965_proc_picture(VADriverContextP ctx,
tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
obj_surface = SURFACE(out_surface_id);
assert(obj_surface);
- i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dst_surface.base = (struct object_base *)obj_surface;
dst_surface.type = I965_SURFACE_TYPE_SURFACE;
@@ -5272,16 +5481,20 @@ i965_proc_picture(VADriverContextP ctx,
dst_rect.height = in_height;
}
+ proc_context->pp_context.pipeline_param = pipeline_param;
+
for (i = 0; i < pipeline_param->num_filters; i++) {
struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
VAProcFilterParameterBufferBase *filter_param = NULL;
VAProcFilterType filter_type;
int kernel_index;
- assert(obj_buffer && obj_buffer->buffer_store);
-
- if (!obj_buffer || !obj_buffer->buffer_store)
+ if (!obj_buffer ||
+ !obj_buffer->buffer_store ||
+ !obj_buffer->buffer_store->buffer) {
+ status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
goto error;
+ }
out_surface_id = VA_INVALID_ID;
filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
@@ -5300,7 +5513,7 @@ i965_proc_picture(VADriverContextP ctx,
tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
obj_surface = SURFACE(out_surface_id);
assert(obj_surface);
- i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dst_surface.base = (struct object_base *)obj_surface;
dst_surface.type = I965_SURFACE_TYPE_SURFACE;
status = i965_post_processing_internal(ctx, &proc_context->pp_context,
@@ -5319,14 +5532,16 @@ i965_proc_picture(VADriverContextP ctx,
}
}
+ proc_context->pp_context.pipeline_param = NULL;
obj_surface = SURFACE(proc_state->current_render_target);
- assert(obj_surface);
- if (!obj_surface)
+ if (!obj_surface) {
+ status = VA_STATUS_ERROR_INVALID_SURFACE;
goto error;
+ }
int csc_needed = 0;
- if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC('N','V','1','2')){
+ if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC_NV12){
csc_needed = 1;
out_surface_id = VA_INVALID_ID;
status = i965_CreateSurfaces(ctx,
@@ -5339,10 +5554,10 @@ i965_proc_picture(VADriverContextP ctx,
tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
struct object_surface *csc_surface = SURFACE(out_surface_id);
assert(csc_surface);
- i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dst_surface.base = (struct object_base *)csc_surface;
} else {
- i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dst_surface.base = (struct object_base *)obj_surface;
}
@@ -5397,7 +5612,7 @@ error:
tmp_surfaces,
num_tmp_surfaces);
- return VA_STATUS_ERROR_INVALID_PARAMETER;
+ return status;
}
static void
@@ -5413,13 +5628,14 @@ i965_proc_context_destroy(void *hw_context)
struct hw_context *
i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_driver_data *intel = intel_driver_data(ctx);
struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
proc_context->base.destroy = i965_proc_context_destroy;
proc_context->base.run = i965_proc_picture;
proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
- i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
+ i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
return (struct hw_context *)proc_context;
}
diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
index 66fcdef..f0a277e 100755
--- a/src/i965_post_processing.h
+++ b/src/i965_post_processing.h
@@ -51,6 +51,7 @@ enum
PP_PL3_LOAD_SAVE_PA,
PP_PA_LOAD_SAVE_NV12,
PP_PA_LOAD_SAVE_PL3,
+ PP_PA_LOAD_SAVE_PA,
PP_RGBX_LOAD_SAVE_NV12,
PP_NV12_LOAD_SAVE_RGBX,
NUM_PP_MODULES,
@@ -93,12 +94,17 @@ struct pp_dndi_context
{
int dest_w;
int dest_h;
+ dri_bo *stmm_bo;
+ int frame_order; /* -1 for the first frame */
+ VASurfaceID current_out_surface;
+ struct object_surface *current_out_obj_surface;
};
struct pp_dn_context
{
int dest_w;
int dest_h;
+ dri_bo *stmm_bo;
};
struct i965_post_processing_context;
@@ -374,7 +380,7 @@ struct gen7_pp_static_parameter
unsigned int di_destination_packed_y_component_offset:8;
unsigned int di_destination_packed_u_component_offset:8;
unsigned int di_destination_packed_v_component_offset:8;
- unsigned int pad0:8;
+ unsigned int alpha:8;
} grf2;
struct {
@@ -472,16 +478,21 @@ struct i965_post_processing_context
} urb;
struct {
- dri_bo *bo;
- } stmm;
-
- union {
- struct pp_load_save_context pp_load_save_context;
- struct pp_scaling_context pp_scaling_context;
- struct pp_avs_context pp_avs_context;
- struct pp_dndi_context pp_dndi_context;
- struct pp_dn_context pp_dn_context;
- } private_context;
+ unsigned int gpgpu_mode : 1;
+ unsigned int pad0 : 7;
+ unsigned int max_num_threads : 16;
+ unsigned int num_urb_entries : 8;
+ unsigned int urb_entry_size : 16;
+ unsigned int curbe_allocation_size : 16;
+ } vfe_gpu_state;
+
+ struct pp_load_save_context pp_load_save_context;
+ struct pp_scaling_context pp_scaling_context;
+ struct pp_avs_context pp_avs_context;
+ struct pp_dndi_context pp_dndi_context;
+ struct pp_dn_context pp_dn_context;
+ void *private_context; /* pointer to the current private context */
+ void *pipeline_param; /* pointer to the pipeline parameter */
int (*pp_x_steps)(void *private_context);
int (*pp_y_steps)(void *private_context);
@@ -492,6 +503,39 @@ struct i965_post_processing_context
unsigned int block_horizontal_mask_left:16;
unsigned int block_horizontal_mask_right:16;
unsigned int block_vertical_mask_bottom:8;
+
+ struct {
+ dri_bo *bo;
+ int bo_size;
+ unsigned int end_offset;
+ } instruction_state;
+
+ struct {
+ dri_bo *bo;
+ } indirect_state;
+
+ struct {
+ dri_bo *bo;
+ int bo_size;
+ unsigned int end_offset;
+ } dynamic_state;
+
+ unsigned int sampler_offset;
+ int sampler_size;
+ unsigned int idrt_offset;
+ int idrt_size;
+ unsigned int curbe_offset;
+ int curbe_size;
+
+ VAStatus (*intel_post_processing)(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ int pp_index,
+ void * filter_param);
+ void (*finalize)(struct i965_post_processing_context *pp_context);
};
struct i965_proc_context
@@ -532,4 +576,11 @@ i965_post_processing_terminate(VADriverContextP ctx);
bool
i965_post_processing_init(VADriverContextP ctx);
+
+extern VAStatus
+i965_proc_picture(VADriverContextP ctx,
+ VAProfile profile,
+ union codec_state *codec_state,
+ struct hw_context *hw_context);
+
#endif /* __I965_POST_PROCESSING_H__ */
diff --git a/src/i965_render.c b/src/i965_render.c
index 21ec844..38f70eb 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -35,6 +35,7 @@
#include <stdlib.h>
#include <string.h>
#include <assert.h>
+#include <math.h>
#include <va/va_drmcommon.h>
@@ -54,7 +55,7 @@ static const uint32_t sf_kernel_static[][4] =
#include "shaders/render/exa_sf.g4b"
};
-#define PS_KERNEL_NUM_GRF 32
+#define PS_KERNEL_NUM_GRF 48
#define PS_MAX_THREADS 32
#define I965_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
@@ -64,6 +65,7 @@ static const uint32_t ps_kernel_static[][4] =
#include "shaders/render/exa_wm_xy.g4b"
#include "shaders/render/exa_wm_src_affine.g4b"
#include "shaders/render/exa_wm_src_sample_planar.g4b"
+#include "shaders/render/exa_wm_yuv_color_balance.g4b"
#include "shaders/render/exa_wm_yuv_rgb.g4b"
#include "shaders/render/exa_wm_write.g4b"
};
@@ -86,6 +88,7 @@ static const uint32_t ps_kernel_static_gen5[][4] =
#include "shaders/render/exa_wm_xy.g4b.gen5"
#include "shaders/render/exa_wm_src_affine.g4b.gen5"
#include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
+#include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
#include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
#include "shaders/render/exa_wm_write.g4b.gen5"
};
@@ -105,6 +108,7 @@ static const uint32_t sf_kernel_static_gen6[][4] =
static const uint32_t ps_kernel_static_gen6[][4] = {
#include "shaders/render/exa_wm_src_affine.g6b"
#include "shaders/render/exa_wm_src_sample_planar.g6b"
+#include "shaders/render/exa_wm_yuv_color_balance.g6b"
#include "shaders/render/exa_wm_yuv_rgb.g6b"
#include "shaders/render/exa_wm_write.g6b"
};
@@ -123,6 +127,7 @@ static const uint32_t sf_kernel_static_gen7[][4] =
static const uint32_t ps_kernel_static_gen7[][4] = {
#include "shaders/render/exa_wm_src_affine.g7b"
#include "shaders/render/exa_wm_src_sample_planar.g7b"
+#include "shaders/render/exa_wm_yuv_color_balance.g7b"
#include "shaders/render/exa_wm_yuv_rgb.g7b"
#include "shaders/render/exa_wm_write.g7b"
};
@@ -137,13 +142,14 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
#include "shaders/render/exa_wm_src_affine.g7b"
#include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
+#include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
#include "shaders/render/exa_wm_yuv_rgb.g7b"
#include "shaders/render/exa_wm_write.g7b"
};
-#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
+
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
@@ -302,8 +308,26 @@ static struct i965_kernel render_kernels_gen7_haswell[] = {
#define URB_SF_ENTRIES 1
#define URB_SF_ENTRY_SIZE 2
-#define URB_CS_ENTRIES 1
-#define URB_CS_ENTRY_SIZE 1
+#define URB_CS_ENTRIES 4
+#define URB_CS_ENTRY_SIZE 4
+
+static float yuv_to_rgb_bt601[3][4] = {
+{1.164, 0, 1.596, -0.06275,},
+{1.164, -0.392, -0.813, -0.50196,},
+{1.164, 2.017, 0, -0.50196,},
+};
+
+static float yuv_to_rgb_bt709[3][4] = {
+{1.164, 0, 1.793, -0.06275,},
+{1.164, -0.213, -0.533, -0.50196,},
+{1.164, 2.112, 0, -0.50196,},
+};
+
+static float yuv_to_rgb_smpte_240[3][4] = {
+{1.164, 0, 1.794, -0.06275,},
+{1.164, -0.258, -0.5425, -0.50196,},
+{1.164, 2.078, 0, -0.50196,},
+};
static void
i965_render_vs_unit(VADriverContextP ctx)
@@ -317,7 +341,7 @@ i965_render_vs_unit(VADriverContextP ctx)
vs_state = render_state->vs.state->virtual;
memset(vs_state, 0, sizeof(*vs_state));
- if (IS_IRONLAKE(i965->intel.device_id))
+ if (IS_IRONLAKE(i965->intel.device_info))
vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
else
vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
@@ -431,7 +455,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx)
wm_state->thread1.single_program_flow = 1; /* XXX */
- if (IS_IRONLAKE(i965->intel.device_id))
+ if (IS_IRONLAKE(i965->intel.device_info))
wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm_state->thread1.binding_table_entry_count = 7;
@@ -439,8 +463,8 @@ i965_subpic_render_wm_unit(VADriverContextP ctx)
wm_state->thread2.scratch_space_base_pointer = 0;
wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
- wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
- wm_state->thread3.const_urb_entry_read_length = 0;
+ wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
+ wm_state->thread3.const_urb_entry_read_length = 4;
wm_state->thread3.const_urb_entry_read_offset = 0;
wm_state->thread3.urb_entry_read_length = 1; /* XXX */
wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
@@ -448,13 +472,13 @@ i965_subpic_render_wm_unit(VADriverContextP ctx)
wm_state->wm4.stats_enable = 0;
wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
wm_state->wm4.sampler_count = 0; /* hardware requirement */
} else {
wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
}
- wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
+ wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
wm_state->wm5.thread_dispatch_enable = 1;
wm_state->wm5.enable_16_pix = 1;
wm_state->wm5.enable_8_pix = 0;
@@ -495,7 +519,7 @@ i965_render_wm_unit(VADriverContextP ctx)
wm_state->thread1.single_program_flow = 1; /* XXX */
- if (IS_IRONLAKE(i965->intel.device_id))
+ if (IS_IRONLAKE(i965->intel.device_info))
wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm_state->thread1.binding_table_entry_count = 7;
@@ -504,7 +528,7 @@ i965_render_wm_unit(VADriverContextP ctx)
wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
- wm_state->thread3.const_urb_entry_read_length = 1;
+ wm_state->thread3.const_urb_entry_read_length = 4;
wm_state->thread3.const_urb_entry_read_offset = 0;
wm_state->thread3.urb_entry_read_length = 1; /* XXX */
wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
@@ -512,13 +536,13 @@ i965_render_wm_unit(VADriverContextP ctx)
wm_state->wm4.stats_enable = 0;
wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
wm_state->wm4.sampler_count = 0; /* hardware requirement */
} else {
wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
}
- wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
+ wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
wm_state->wm5.thread_dispatch_enable = 1;
wm_state->wm5.enable_16_pix = 1;
wm_state->wm5.enable_8_pix = 0;
@@ -779,6 +803,7 @@ gen7_render_set_surface_state(
gen7_render_set_surface_tiling(ss, tiling);
}
+
static void
i965_render_src_surface_state(
VADriverContextP ctx,
@@ -803,12 +828,12 @@ i965_render_src_surface_state(
assert(ss_bo->virtual);
ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
- if (IS_GEN7(i965->intel.device_id)) {
+ if (IS_GEN7(i965->intel.device_info)) {
gen7_render_set_surface_state(ss,
region, offset,
w, h,
pitch, format, flags);
- if (IS_HASWELL(i965->intel.device_id))
+ if (IS_HASWELL(i965->intel.device_info))
gen7_render_set_surface_scs(ss);
dri_bo_emit_reloc(ss_bo,
I915_GEM_DOMAIN_SAMPLER, 0,
@@ -851,7 +876,10 @@ i965_render_src_surfaces_state(
i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */
i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
- if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+ if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
+ return;
+
+ if (obj_surface->fourcc == VA_FOURCC_NV12) {
i965_render_src_surface_state(ctx, 3, region,
region_pitch * obj_surface->y_cb_offset,
obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
@@ -918,12 +946,12 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
assert(ss_bo->virtual);
ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
- if (IS_GEN7(i965->intel.device_id)) {
+ if (IS_GEN7(i965->intel.device_info)) {
gen7_render_set_surface_state(ss,
dest_region->bo, 0,
dest_region->width, dest_region->height,
dest_region->pitch, format, 0);
- if (IS_HASWELL(i965->intel.device_id))
+ if (IS_HASWELL(i965->intel.device_info))
gen7_render_set_surface_scs(ss);
dri_bo_emit_reloc(ss_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
@@ -1050,29 +1078,62 @@ i965_render_upload_vertex(
i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
}
+#define PI 3.1415926
+
static void
i965_render_upload_constants(VADriverContextP ctx,
- struct object_surface *obj_surface)
+ struct object_surface *obj_surface,
+ unsigned int flags)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_render_state *render_state = &i965->render_state;
unsigned short *constant_buffer;
+ float *color_balance_base;
+ float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
+ float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
+ float hue = (float)i965->hue_attrib->value / 180 * PI;
+ float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
+ float *yuv_to_rgb;
+ unsigned int color_flag;
dri_bo_map(render_state->curbe.bo, 1);
assert(render_state->curbe.bo->virtual);
constant_buffer = render_state->curbe.bo->virtual;
if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
- assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
+ assert(obj_surface->fourcc == VA_FOURCC_Y800);
- *constant_buffer = 2;
+ constant_buffer[0] = 2;
} else {
- if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
- *constant_buffer = 1;
+ if (obj_surface->fourcc == VA_FOURCC_NV12)
+ constant_buffer[0] = 1;
else
- *constant_buffer = 0;
+ constant_buffer[0] = 0;
}
+ if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
+ i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
+ i965->hue_attrib->value == DEFAULT_HUE &&
+ i965->saturation_attrib->value == DEFAULT_SATURATION)
+ constant_buffer[1] = 1; /* skip color balance transformation */
+ else
+ constant_buffer[1] = 0;
+
+ color_balance_base = (float *)constant_buffer + 4;
+ *color_balance_base++ = contrast;
+ *color_balance_base++ = brightness;
+ *color_balance_base++ = cos(hue) * contrast * saturation;
+ *color_balance_base++ = sin(hue) * contrast * saturation;
+
+ color_flag = flags & VA_SRC_COLOR_MASK;
+ yuv_to_rgb = (float *)constant_buffer + 8;
+ if (color_flag == VA_SRC_BT709)
+ memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
+ else if (color_flag == VA_SRC_SMPTE_240)
+ memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
+ else
+ memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
+
dri_bo_unmap(render_state->curbe.bo);
}
@@ -1118,7 +1179,7 @@ i965_surface_render_state_setup(
i965_render_cc_viewport(ctx);
i965_render_cc_unit(ctx);
i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
- i965_render_upload_constants(ctx, obj_surface);
+ i965_render_upload_constants(ctx, obj_surface, flags);
}
static void
@@ -1172,7 +1233,7 @@ i965_render_state_base_address(VADriverContextP ctx)
struct intel_batchbuffer *batch = i965->batch;
struct i965_render_state *render_state = &i965->render_state;
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
@@ -1336,7 +1397,7 @@ i965_render_vertex_elements(VADriverContextP ctx)
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = i965->batch;
- if (IS_IRONLAKE(i965->intel.device_id)) {
+ if (IS_IRONLAKE(i965->intel.device_info)) {
BEGIN_BATCH(batch, 5);
OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
/* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
@@ -1428,7 +1489,7 @@ i965_render_startup(VADriverContextP ctx)
((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
- if (IS_IRONLAKE(i965->intel.device_id))
+ if (IS_IRONLAKE(i965->intel.device_info))
OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
else
OUT_BATCH(batch, 3);
@@ -1478,8 +1539,8 @@ i965_clear_dest_region(VADriverContextP ctx)
br13 |= pitch;
- if (IS_GEN6(i965->intel.device_id) ||
- IS_GEN7(i965->intel.device_id)) {
+ if (IS_GEN6(i965->intel.device_info) ||
+ IS_GEN7(i965->intel.device_info)) {
intel_batchbuffer_start_atomic_blt(batch, 24);
BEGIN_BLT_BATCH(batch, 6);
} else {
@@ -1540,6 +1601,7 @@ i965_subpic_render_pipeline_setup(VADriverContextP ctx)
i965_render_pipelined_pointers(ctx);
i965_render_urb_layout(ctx);
i965_render_cs_urb_layout(ctx);
+ i965_render_constant_buffer(ctx);
i965_render_drawing_rectangle(ctx);
i965_render_vertex_elements(ctx);
i965_render_startup(ctx);
@@ -1805,7 +1867,7 @@ gen6_render_setup_states(
gen6_render_color_calc_state(ctx);
gen6_render_blend_state(ctx);
gen6_render_depth_stencil_state(ctx);
- i965_render_upload_constants(ctx, obj_surface);
+ i965_render_upload_constants(ctx, obj_surface, flags);
i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
}
@@ -2044,7 +2106,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel)
OUT_RELOC(batch,
render_state->curbe.bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0);
+ (URB_CS_ENTRY_SIZE-1));
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
@@ -2057,7 +2119,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel)
(5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(batch, 0);
OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
- OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+ OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
GEN6_3DSTATE_WM_DISPATCH_ENABLE |
GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
@@ -2307,6 +2369,11 @@ gen7_render_initialize(VADriverContextP ctx)
render_state->cc.depth_stencil = bo;
}
+/*
+ * for GEN8
+ */
+#define ALIGNMENT 64
+
static void
gen7_render_color_calc_state(VADriverContextP ctx)
{
@@ -2383,6 +2450,7 @@ gen7_render_sampler(VADriverContextP ctx)
dri_bo_unmap(render_state->wm.sampler);
}
+
static void
gen7_render_setup_states(
VADriverContextP ctx,
@@ -2399,10 +2467,11 @@ gen7_render_setup_states(
gen7_render_color_calc_state(ctx);
gen7_render_blend_state(ctx);
gen7_render_depth_stencil_state(ctx);
- i965_render_upload_constants(ctx, obj_surface);
+ i965_render_upload_constants(ctx, obj_surface, flags);
i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
}
+
static void
gen7_emit_invarient_states(VADriverContextP ctx)
{
@@ -2486,7 +2555,7 @@ gen7_emit_urb(VADriverContextP ctx)
struct intel_batchbuffer *batch = i965->batch;
unsigned int num_urb_entries = 32;
- if (IS_HASWELL(i965->intel.device_id))
+ if (IS_HASWELL(i965->intel.device_info))
num_urb_entries = 64;
BEGIN_BATCH(batch, 2);
@@ -2795,7 +2864,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
unsigned int num_samples = 0;
- if (IS_HASWELL(i965->intel.device_id)) {
+ if (IS_HASWELL(i965->intel.device_info)) {
max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
}
@@ -2810,7 +2879,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
BEGIN_BATCH(batch, 7);
OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
- OUT_BATCH(batch, 1);
+ OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
OUT_BATCH(batch, 0);
OUT_RELOC(batch,
render_state->curbe.bo,
@@ -2832,7 +2901,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
(5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(batch, 0); /* scratch space base offset */
OUT_BATCH(batch,
- ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
+ ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
GEN7_PS_PUSH_CONSTANT_ENABLE |
GEN7_PS_ATTRIBUTE_ENABLE |
GEN7_PS_16_DISPATCH_ENABLE);
@@ -2930,6 +2999,7 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel)
intel_batchbuffer_end_atomic(batch);
}
+
static void
gen7_render_put_surface(
VADriverContextP ctx,
@@ -2949,6 +3019,7 @@ gen7_render_put_surface(
intel_batchbuffer_flush(batch);
}
+
static void
gen7_subpicture_render_blend_state(VADriverContextP ctx)
{
@@ -3012,13 +3083,6 @@ gen7_render_put_subpicture(
}
-/*
- * global functions
- */
-VAStatus
-i965_DestroySurfaces(VADriverContextP ctx,
- VASurfaceID *surface_list,
- int num_surfaces);
void
intel_render_put_surface(
VADriverContextP ctx,
@@ -3029,6 +3093,7 @@ intel_render_put_surface(
)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
int has_done_scaling = 0;
VASurfaceID out_surface_id = i965_post_processing(ctx,
obj_surface,
@@ -3049,12 +3114,7 @@ intel_render_put_surface(
src_rect = dst_rect;
}
- if (IS_GEN7(i965->intel.device_id))
- gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
- else if (IS_GEN6(i965->intel.device_id))
- gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
- else
- i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
+ render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
if (out_surface_id != VA_INVALID_ID)
i965_DestroySurfaces(ctx, &out_surface_id, 1);
@@ -3069,17 +3129,57 @@ intel_render_put_subpicture(
)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
- if (IS_GEN7(i965->intel.device_id))
- gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
- else if (IS_GEN6(i965->intel.device_id))
- gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
- else
- i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
+ render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
+}
+
+static void
+genx_render_terminate(VADriverContextP ctx)
+{
+ int i;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+
+ dri_bo_unreference(render_state->curbe.bo);
+ render_state->curbe.bo = NULL;
+
+ for (i = 0; i < NUM_RENDER_KERNEL; i++) {
+ struct i965_kernel *kernel = &render_state->render_kernels[i];
+
+ dri_bo_unreference(kernel->bo);
+ kernel->bo = NULL;
+ }
+
+ dri_bo_unreference(render_state->vb.vertex_buffer);
+ render_state->vb.vertex_buffer = NULL;
+ dri_bo_unreference(render_state->vs.state);
+ render_state->vs.state = NULL;
+ dri_bo_unreference(render_state->sf.state);
+ render_state->sf.state = NULL;
+ dri_bo_unreference(render_state->wm.sampler);
+ render_state->wm.sampler = NULL;
+ dri_bo_unreference(render_state->wm.state);
+ render_state->wm.state = NULL;
+ dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
+ dri_bo_unreference(render_state->cc.viewport);
+ render_state->cc.viewport = NULL;
+ dri_bo_unreference(render_state->cc.state);
+ render_state->cc.state = NULL;
+ dri_bo_unreference(render_state->cc.blend);
+ render_state->cc.blend = NULL;
+ dri_bo_unreference(render_state->cc.depth_stencil);
+ render_state->cc.depth_stencil = NULL;
+
+ if (render_state->draw_region) {
+ dri_bo_unreference(render_state->draw_region->bo);
+ free(render_state->draw_region);
+ render_state->draw_region = NULL;
+ }
}
bool
-i965_render_init(VADriverContextP ctx)
+genx_render_init(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_render_state *render_state = &i965->render_state;
@@ -3091,16 +3191,27 @@ i965_render_init(VADriverContextP ctx)
assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
sizeof(render_kernels_gen6[0])));
- if (IS_GEN7(i965->intel.device_id))
+ if (IS_GEN7(i965->intel.device_info)) {
memcpy(render_state->render_kernels,
- (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
+ (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
sizeof(render_state->render_kernels));
- else if (IS_GEN6(i965->intel.device_id))
+ render_state->render_put_surface = gen7_render_put_surface;
+ render_state->render_put_subpicture = gen7_render_put_subpicture;
+ } else if (IS_GEN6(i965->intel.device_info)) {
memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
- else if (IS_IRONLAKE(i965->intel.device_id))
+ render_state->render_put_surface = gen6_render_put_surface;
+ render_state->render_put_subpicture = gen6_render_put_subpicture;
+ } else if (IS_IRONLAKE(i965->intel.device_info)) {
memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
- else
+ render_state->render_put_surface = i965_render_put_surface;
+ render_state->render_put_subpicture = i965_render_put_subpicture;
+ } else {
memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
+ render_state->render_put_surface = i965_render_put_surface;
+ render_state->render_put_subpicture = i965_render_put_subpicture;
+ }
+
+ render_state->render_terminate = genx_render_terminate;
for (i = 0; i < NUM_RENDER_KERNEL; i++) {
struct i965_kernel *kernel = &render_state->render_kernels[i];
@@ -3121,73 +3232,22 @@ i965_render_init(VADriverContextP ctx)
4096, 64);
assert(render_state->curbe.bo);
- if (IS_HSW_GT1(i965->intel.device_id)) {
- render_state->max_wm_threads = 102;
- } else if (IS_HSW_GT2(i965->intel.device_id)) {
- render_state->max_wm_threads = 204;
- } else if (IS_HSW_GT3(i965->intel.device_id)) {
- render_state->max_wm_threads = 408;
- } else if (IS_IVB_GT1(i965->intel.device_id)) {
- render_state->max_wm_threads = 48;
- } else if (IS_IVB_GT2(i965->intel.device_id)) {
- render_state->max_wm_threads = 172;
- } else if (IS_SNB_GT1(i965->intel.device_id)) {
- render_state->max_wm_threads = 40;
- } else if (IS_SNB_GT2(i965->intel.device_id)) {
- render_state->max_wm_threads = 80;
- } else if (IS_IRONLAKE(i965->intel.device_id)) {
- render_state->max_wm_threads = 72; /* 12 * 6 */
- } else if (IS_G4X(i965->intel.device_id)) {
- render_state->max_wm_threads = 50; /* 12 * 5 */
- } else {
- /* should never get here !!! */
- assert(0);
- }
-
return true;
}
-void
-i965_render_terminate(VADriverContextP ctx)
+bool
+i965_render_init(VADriverContextP ctx)
{
- int i;
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_render_state *render_state = &i965->render_state;
-
- dri_bo_unreference(render_state->curbe.bo);
- render_state->curbe.bo = NULL;
- for (i = 0; i < NUM_RENDER_KERNEL; i++) {
- struct i965_kernel *kernel = &render_state->render_kernels[i];
-
- dri_bo_unreference(kernel->bo);
- kernel->bo = NULL;
- }
+ return i965->codec_info->render_init(ctx);
+}
- dri_bo_unreference(render_state->vb.vertex_buffer);
- render_state->vb.vertex_buffer = NULL;
- dri_bo_unreference(render_state->vs.state);
- render_state->vs.state = NULL;
- dri_bo_unreference(render_state->sf.state);
- render_state->sf.state = NULL;
- dri_bo_unreference(render_state->wm.sampler);
- render_state->wm.sampler = NULL;
- dri_bo_unreference(render_state->wm.state);
- render_state->wm.state = NULL;
- dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
- dri_bo_unreference(render_state->cc.viewport);
- render_state->cc.viewport = NULL;
- dri_bo_unreference(render_state->cc.state);
- render_state->cc.state = NULL;
- dri_bo_unreference(render_state->cc.blend);
- render_state->cc.blend = NULL;
- dri_bo_unreference(render_state->cc.depth_stencil);
- render_state->cc.depth_stencil = NULL;
+void
+i965_render_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
- if (render_state->draw_region) {
- dri_bo_unreference(render_state->draw_region->bo);
- free(render_state->draw_region);
- render_state->draw_region = NULL;
- }
+ render_state->render_terminate(ctx);
}
-
diff --git a/src/i965_render.h b/src/i965_render.h
index f09b535..fde398b 100644
--- a/src/i965_render.h
+++ b/src/i965_render.h
@@ -33,6 +33,8 @@
#define NUM_RENDER_KERNEL 3
+#define VA_SRC_COLOR_MASK 0x000000f0
+
#include "i965_post_processing.h"
struct i965_kernel;
@@ -77,7 +79,51 @@ struct i965_render_state
struct i965_kernel render_kernels[3];
- int max_wm_threads;
+ struct {
+ dri_bo *bo;
+ int bo_size;
+ unsigned int end_offset;
+ } instruction_state;
+
+ struct {
+ dri_bo *bo;
+ } indirect_state;
+
+ struct {
+ dri_bo *bo;
+ int bo_size;
+ unsigned int end_offset;
+ } dynamic_state;
+
+ unsigned int curbe_offset;
+ int curbe_size;
+
+ unsigned int sampler_offset;
+ int sampler_size;
+
+ unsigned int cc_viewport_offset;
+ int cc_viewport_size;
+
+ unsigned int cc_state_offset;
+ int cc_state_size;
+
+ unsigned int blend_state_offset;
+ int blend_state_size;
+
+ unsigned int sf_clip_offset;
+ int sf_clip_size;
+
+ unsigned int scissor_offset;
+ int scissor_size;
+
+ void (*render_put_surface)(VADriverContextP ctx, struct object_surface *,
+ const VARectangle *src_rec,
+ const VARectangle *dst_rect,
+ unsigned int flags);
+ void (*render_put_subpicture)(VADriverContextP ctx, struct object_surface *,
+ const VARectangle *src_rec,
+ const VARectangle *dst_rect);
+ void (*render_terminate)(VADriverContextP ctx);
};
bool i965_render_init(VADriverContextP ctx);
@@ -105,4 +151,10 @@ struct gen7_surface_state;
void
gen7_render_set_surface_scs(struct gen7_surface_state *ss);
+struct gen8_surface_state;
+void
+gen8_render_set_surface_scs(struct gen8_surface_state *ss);
+
+extern bool gen8_render_init(VADriverContextP ctx);
+
#endif /* _I965_RENDER_H_ */
diff --git a/src/i965_structs.h b/src/i965_structs.h
index c7dd272..682dc2d 100644
--- a/src/i965_structs.h
+++ b/src/i965_structs.h
@@ -968,6 +968,364 @@ struct i965_sampler_dndi
} dw7;
};
+struct gen8_interface_descriptor_data
+{
+ struct {
+ unsigned int pad0:6;
+ unsigned int kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ unsigned int kernel_start_pointer_high:16;
+ unsigned int pad0:16;
+ } desc1;
+
+ struct {
+ unsigned int pad0:7;
+ unsigned int software_exception_enable:1;
+ unsigned int pad1:3;
+ unsigned int maskstack_exception_enable:1;
+ unsigned int pad2:1;
+ unsigned int illegal_opcode_exception_enable:1;
+ unsigned int pad3:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int single_program_flow:1;
+ unsigned int denorm_mode:1;
+ unsigned int pad4:12;
+ } desc2;
+
+ struct {
+ unsigned int pad0:2;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } desc3;
+
+ struct {
+ unsigned int binding_table_entry_count:5;
+ unsigned int binding_table_pointer:11;
+ unsigned int pad0: 16;
+ } desc4;
+
+ struct {
+ unsigned int constant_urb_entry_read_offset:16;
+ unsigned int constant_urb_entry_read_length:16;
+ } desc5;
+
+ struct {
+ unsigned int num_threads_in_tg:10;
+ unsigned int pad0:5;
+ unsigned int global_barrier_enable:1;
+ unsigned int shared_local_memory_size:5;
+ unsigned int barrier_enable:1;
+ unsigned int rounding_mode:2;
+ unsigned int pad1:8;
+ } desc6;
+
+ struct {
+ unsigned int cross_thread_constant_data_read_length:8;
+ unsigned int pad0:24;
+ } desc7;
+};
+
+struct gen8_surface_state
+{
+ struct {
+ unsigned int cube_pos_z:1;
+ unsigned int cube_neg_z:1;
+ unsigned int cube_pos_y:1;
+ unsigned int cube_neg_y:1;
+ unsigned int cube_pos_x:1;
+ unsigned int cube_neg_x:1;
+ unsigned int media_boundary_pixel_mode:2;
+ unsigned int render_cache_read_write:1;
+ unsigned int sampler_l2bypass_disable:1;
+ unsigned int vert_line_stride_ofs:1;
+ unsigned int vert_line_stride:1;
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int horizontal_alignment:2;
+ /* Field 16 */
+ unsigned int vertical_alignment:2;
+ unsigned int surface_format:9; /**< BRW_SURFACEFORMAT_x */
+ unsigned int pad0:1;
+ unsigned int is_array:1;
+ unsigned int surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
+ } ss0;
+
+ struct {
+ unsigned int surface_qpitch:15;
+ unsigned int pad0:4;
+ unsigned int base_mip_level:5;
+ unsigned int surface_mocs:7;
+ unsigned int pad1:1;
+ } ss1;
+
+ struct {
+ unsigned int width:14;
+ unsigned int pad0:2;
+ unsigned int height:14;
+ unsigned int pad1:2;
+ } ss2;
+
+ struct {
+ unsigned int pitch:18;
+ unsigned int pad:3;
+ unsigned int depth:11;
+ } ss3;
+
+ struct {
+ unsigned int multisample_position_palette_index:3;
+ unsigned int num_multisamples:3;
+ unsigned int multisampled_surface_storage_format:1;
+ unsigned int render_target_view_extent:11;
+ unsigned int min_array_elt:11;
+ unsigned int rotation:2;
+ unsigned int force_ncmp_reduce_type:1;
+ } ss4;
+
+ struct {
+ unsigned int mip_count:4;
+ unsigned int min_lod:4;
+ unsigned int pad0:4;
+ unsigned int pad1:2;
+ unsigned int coherence_type:1;
+ unsigned int pad2:3;
+ unsigned int pad3:2;
+ unsigned int ewa_disable_cube:1;
+ unsigned int y_offset:3;
+ unsigned int pad4:1;
+ unsigned int x_offset:7;
+ } ss5;
+
+ struct {
+ unsigned int y_offset_uv_plane:14;
+ unsigned int pad0:2;
+ unsigned int x_offset_uv_plane:14;
+ unsigned int pad1:1;
+ unsigned int separate_uv_plane:1;
+ } ss6;
+
+ struct {
+ unsigned int resource_min_lod:12;
+ unsigned int pad0:4;
+ unsigned int shader_chanel_select_a:3;
+ unsigned int shader_chanel_select_b:3;
+ unsigned int shader_chanel_select_g:3;
+ unsigned int shader_chanel_select_r:3;
+ unsigned int alpha_clear_color:1;
+ unsigned int blue_clear_color:1;
+ unsigned int green_clear_color:1;
+ unsigned int red_clear_color:1;
+ } ss7;
+ struct {
+ unsigned int base_addr;
+ } ss8;
+
+ struct {
+ unsigned int base_addr_high:16;
+ unsigned int pad0:16;
+ } ss9;
+
+ struct {
+ unsigned int pad0:12;
+ unsigned int aux_base_addr:20;
+ } ss10;
+
+ union {
+ struct {
+ unsigned int y_offset_v_plane:14;
+ unsigned int pad0:2;
+ unsigned int x_offset_v_plane:14;
+ unsigned int pad1:2;
+ } planar;
+ struct {
+ unsigned int aux_base_addr_high:16;
+ unsigned int pad2:16;
+ } aux_buffer;
+ } ss11;
+
+ struct {
+ unsigned int hier_depth_clear;
+ } ss12;
+
+ struct {
+ unsigned int pad0;
+ } ss13;
+
+ struct {
+ unsigned int pad0;
+ } ss14;
+
+ struct {
+ unsigned int pad0;
+ } ss15;
+};
+
+struct gen8_surface_state2
+{
+ struct {
+ unsigned int pad0;
+ } ss0;
+
+ struct {
+ unsigned int cbcr_pixel_offset_v_direction:2;
+ unsigned int picture_structure:2;
+ unsigned int width:14;
+ unsigned int height:14;
+ } ss1;
+
+ struct {
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int half_pitch_for_chroma:1;
+ unsigned int pitch:18;
+ unsigned int address_ctrl:1; /* clamp or mirror mode */
+ unsigned int pad0:4;
+ unsigned int interleave_chroma:1;
+ unsigned int surface_format:5;
+ } ss2;
+
+ struct {
+ unsigned int y_offset_for_cb:14;
+ unsigned int pad0:2;
+ unsigned int x_offset_for_cb:14;
+ unsigned int pad1:2;
+ } ss3;
+
+ struct {
+ unsigned int y_offset_for_cr:15;
+ unsigned int pad0:1;
+ unsigned int x_offset_for_cr:14;
+ unsigned int pad1:2;
+ } ss4;
+
+ struct {
+ unsigned int surface_object_mocs:7;
+ unsigned int pad0:11;
+ unsigned int pad1:2;
+ unsigned int pad2:10;
+ unsigned int vert_line_stride_offset:1;
+ unsigned int vert_line_stride:1;
+ } ss5;
+
+ struct {
+ unsigned int base_addr;
+ } ss6;
+
+ struct {
+ unsigned int base_addr_high:16;
+ unsigned int pad0:16;
+ } ss7;
+};
+
+struct gen8_sampler_state
+{
+ struct
+ {
+ unsigned int aniso_algorithm:1;
+ unsigned int lod_bias:13;
+ unsigned int min_filter:3;
+ unsigned int mag_filter:3;
+ unsigned int mip_filter:2;
+ unsigned int base_level:5;
+ unsigned int lod_preclamp:2;
+ unsigned int default_color_mode:1;
+ unsigned int pad0:1;
+ unsigned int disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned int cube_control_mode:1;
+ unsigned int shadow_function:3;
+ unsigned int chroma_key_mode:1;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int max_lod:12;
+ unsigned int min_lod:12;
+ } ss1;
+
+ struct
+ {
+ unsigned int lod_clamp_mag_mode:1; /* MIPNONE or MIPFILTER */
+ unsigned int flex_filter_vert_align:1;
+ unsigned int flex_filter_hort_align:1;
+ unsigned int flex_filter_coff_size:1; /* coff8 or coff 16 */
+ unsigned int flex_filter_mode:1;
+ unsigned int pad0:1;
+ unsigned int indirect_state_pointer:18; /* point to the SAMPLE_INDIRECT_STATE */
+ union {
+ unsigned char nonsep_filter_footer_highmask;
+ struct {
+ unsigned char pad1:2;
+ unsigned char sep_filter_height:2;
+ unsigned char sep_filter_width:2;
+ unsigned char sep_filter_coff_size:2;
+ } sep_filter;
+ } ss2_byte3;
+ } ss2;
+
+ struct
+ {
+ unsigned int r_wrap_mode:3;
+ unsigned int t_wrap_mode:3;
+ unsigned int s_wrap_mode:3;
+ unsigned int pad0:1;
+ unsigned int non_normalized_coord:1;
+ unsigned int trilinear_quality:2;
+ unsigned int address_round:6;
+ unsigned int max_aniso:3;
+ unsigned int pad1:2;
+ unsigned int nonsep_filter_foot_lowmask:8;
+ } ss3;
+};
+
+struct gen8_global_blend_state
+{
+ unsigned int pad0:19;
+ unsigned int ydither_offset:2;
+ unsigned int xdither_offset:2;
+ unsigned int color_dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int alpha_to_coverage:1;
+};
+
+struct gen8_blend_state_rt {
+ struct {
+ unsigned int blue_write_dis:1;
+ unsigned int green_write_dis:1;
+ unsigned int red_write_dis:1;
+ unsigned int alpha_write_dis:1;
+ unsigned int pad0:1;
+ unsigned int alpha_blend_func:3;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_src_blend_factor:5;
+ unsigned int color_blend_func:3;
+ unsigned int dest_blend_factor:5;
+ unsigned int src_blend_factor:5;
+ unsigned int colorbuf_blend:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pre_blend_src_clamp:1;
+ unsigned int pad0:22;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ } blend1;
+};
+
+/* TODO: Add the sampler_8x8 for Gen8+.
+ * AVS/Convolve is 256DWs.
+ * MinMaxfilter/Erode/Dilate: 8DWs*/
+
struct gen6_blend_state
{
@@ -1356,6 +1714,7 @@ struct gen7_sampler_8x8
} dw3;
};
+/* This can also be used for BDW+ */
struct gen7_sampler_dndi
{
struct {
@@ -1441,4 +1800,161 @@ struct gen7_sampler_dndi
} dw7;
};
+struct gen8_sampler_8x8_avs {
+ struct {
+ unsigned int gain_factor:6;
+ unsigned int weak_edge_threshold:6;
+ unsigned int strong_edge_threshold:6;
+ unsigned int r3x_coefficient:5;
+ unsigned int r3c_coefficient:5;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int pad1:1;
+ } dw0;
+
+ struct {
+ unsigned int pad0;
+ } dw1;
+
+ struct {
+ unsigned int global_noise_estimation:8;
+ unsigned int non_edge_weight:3;
+ unsigned int regular_weight:3;
+ unsigned int strong_edge_weight:3;
+ unsigned int r5x_coefficient:5;
+ unsigned int r5cx_coefficient:5;
+ unsigned int r5c_coefficient:5;
+ } dw2;
+
+ struct {
+ unsigned int sin_alpha:8; /* S0.7 */
+ unsigned int cos_alpha:8; /* S0.7 */
+ unsigned int sat_max:6;
+ unsigned int hue_max:6;
+ unsigned int enable_8tap_filter:2;
+ unsigned int ief4_smooth_enable:1;
+ unsigned int skin_ief_enable:1;
+ } dw3;
+
+ struct {
+ unsigned int s3u:11; /* S2.8 */
+ unsigned int pad0:1;
+ unsigned int diamond_margin:3;
+ unsigned int vy_std_enable:1;
+ unsigned int umid:8;
+ unsigned int vmid:8;
+ } dw4;
+
+ struct {
+ unsigned int diamond_dv:7;
+ unsigned int diamond_th:6;
+ unsigned int diamond_alpha:8;
+ unsigned int hs_margin:3;
+ unsigned int diamond_du:7;
+ unsigned int skin_detailfilter:1;
+ } dw5;
+
+ struct {
+ unsigned int y_point1:8;
+ unsigned int y_point2:8;
+ unsigned int y_point3:8;
+ unsigned int y_point4:8;
+ } dw6;
+
+ struct {
+ unsigned int inv_margin_vyl:16;
+ unsigned int pad0:16;
+ } dw7;
+
+ struct {
+ unsigned int inv_margin_vyu:16;
+ unsigned int p0l:8;
+ unsigned int p1l:8;
+ } dw8;
+
+ struct {
+ unsigned int p2l:8;
+ unsigned int p3l:8;
+ unsigned int b0l:8;
+ unsigned int b1l:8;
+ } dw9;
+
+ struct {
+ unsigned int b2l:8;
+ unsigned int b3l:8;
+ unsigned int s0l:11;
+ unsigned int y_slope2:5;
+ } dw10;
+
+ struct {
+ unsigned int s1l:11;
+ unsigned int s2l:11;
+ unsigned int pad0:10;
+ } dw11;
+
+ struct {
+ unsigned int s3l:11;
+ unsigned int p0u:8;
+ unsigned int p1u:8;
+ unsigned int y_slope1:5;
+ } dw12;
+
+ struct {
+ unsigned int p2u:8;
+ unsigned int p3u:8;
+ unsigned int b0u:8;
+ unsigned int b1u:8;
+ } dw13;
+
+ struct {
+ unsigned int b2u:8;
+ unsigned int b3u:8;
+ unsigned int s0u:11;
+ unsigned int pad0:5;
+ } dw14;
+
+ struct {
+ unsigned int s1u:11;
+ unsigned int s2u:11;
+ unsigned int pad0:10;
+ } dw15;
+
+ /* DW16-DW151 */
+ struct i965_sampler_8x8_coefficient coefficients[17];
+
+ struct {
+ unsigned int transition_area_with_8_pixels:3;
+ unsigned int pad0:1;
+ unsigned int transition_area_with_4_pixels:3;
+ unsigned int pad1:1;
+ unsigned int max_derivative_8_pixels:8;
+ unsigned int max_derivative_4_pixels:8;
+ unsigned int default_sharpness_level:8;
+ } dw152;
+
+ struct {
+ unsigned int rgb_adaptive:1;
+ unsigned int adaptive_filter_for_all_channel:1;
+ unsigned int pad0:19;
+ unsigned int bypass_y_adaptive_filtering:1;
+ unsigned int bypass_x_adaptive_filtering:1;
+ unsigned int pad1:9;
+ } dw153;
+
+ /* Reserved to 256DW */
+ unsigned int reserved[102];
+};
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6)
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN8 ALIGN(sizeof(struct gen8_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN8 ALIGN(sizeof(struct gen8_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN8 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN8, SURFACE_STATE_PADDED_SIZE_1_GEN8)
+
#endif /* _I965_STRUCTS_H_ */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 94d968c..c6d3769 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -86,6 +86,16 @@ intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size
batch->intel = intel;
batch->flag = flag;
batch->run = drm_intel_bo_mrb_exec;
+
+ if (IS_GEN6(intel->device_info) &&
+ flag == I915_EXEC_RENDER)
+ batch->wa_render_bo = dri_bo_alloc(intel->bufmgr,
+ "wa scratch",
+ 4096,
+ 4096);
+ else
+ batch->wa_render_bo = NULL;
+
intel_batchbuffer_reset(batch, buffer_size);
return batch;
@@ -99,6 +109,7 @@ void intel_batchbuffer_free(struct intel_batchbuffer *batch)
}
dri_bo_unreference(batch->buffer);
+ dri_bo_unreference(batch->wa_render_bo);
free(batch);
}
@@ -172,27 +183,69 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
{
struct intel_driver_data *intel = batch->intel;
- if (IS_GEN6(intel->device_id) ||
- IS_GEN7(intel->device_id)) {
+ if (IS_GEN6(intel->device_info) ||
+ IS_GEN7(intel->device_info) ||
+ IS_GEN8(intel->device_info)) {
if (batch->flag == I915_EXEC_RENDER) {
- BEGIN_BATCH(batch, 4);
- OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2);
+ if (IS_GEN8(intel->device_info)) {
+ BEGIN_BATCH(batch, 6);
+ OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
- if (IS_GEN6(intel->device_id))
- OUT_BATCH(batch,
+ OUT_BATCH(batch,
+ CMD_PIPE_CONTROL_CS_STALL |
+ CMD_PIPE_CONTROL_WC_FLUSH |
+ CMD_PIPE_CONTROL_TC_FLUSH |
+ CMD_PIPE_CONTROL_DC_FLUSH |
+ CMD_PIPE_CONTROL_NOWRITE);
+ OUT_BATCH(batch, 0); /* write address */
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0); /* write data */
+ OUT_BATCH(batch, 0);
+ ADVANCE_BATCH(batch);
+ } else if (IS_GEN6(intel->device_info)) {
+ assert(batch->wa_render_bo);
+
+ BEGIN_BATCH(batch, 4 * 3);
+
+ OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(batch,
+ CMD_PIPE_CONTROL_CS_STALL |
+ CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(batch, 0); /* address */
+ OUT_BATCH(batch, 0); /* write data */
+
+ OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD);
+ OUT_RELOC(batch,
+ batch->wa_render_bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BATCH(batch, 0); /* write data */
+
+ /* now finally the _real flush */
+ OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(batch,
CMD_PIPE_CONTROL_WC_FLUSH |
CMD_PIPE_CONTROL_TC_FLUSH |
CMD_PIPE_CONTROL_NOWRITE);
- else
+ OUT_BATCH(batch, 0); /* write address */
+ OUT_BATCH(batch, 0); /* write data */
+ ADVANCE_BATCH(batch);
+ } else {
+ BEGIN_BATCH(batch, 4);
+ OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
+
OUT_BATCH(batch,
CMD_PIPE_CONTROL_WC_FLUSH |
CMD_PIPE_CONTROL_TC_FLUSH |
CMD_PIPE_CONTROL_DC_FLUSH |
CMD_PIPE_CONTROL_NOWRITE);
+ OUT_BATCH(batch, 0); /* write address */
+ OUT_BATCH(batch, 0); /* write data */
+ ADVANCE_BATCH(batch);
+ }
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0);
- ADVANCE_BATCH(batch);
} else {
if (batch->flag == I915_EXEC_BLT) {
BEGIN_BLT_BATCH(batch, 4);
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 70ceddb..34ff66d 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -24,6 +24,9 @@ struct intel_batchbuffer
int (*run)(drm_intel_bo *bo, int used,
drm_clip_rect_t *cliprects, int num_cliprects,
int DR4, unsigned int ring_flag);
+
+ /* Used for Sandybdrige workaround */
+ dri_bo *wa_render_bo;
};
struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size);
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 83542d9..994e64c 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -34,6 +34,7 @@
#include "intel_batchbuffer.h"
#include "intel_memman.h"
#include "intel_driver.h"
+uint32_t g_intel_debug_option_flags = 0;
static Bool
intel_driver_get_param(struct intel_driver_data *intel, int param, int *value)
@@ -67,12 +68,22 @@ static void intel_driver_get_revid(struct intel_driver_data *intel, int *value)
return;
}
+extern const struct intel_device_info *i965_get_device_info(int devid);
+
bool
intel_driver_init(VADriverContextP ctx)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state;
- int has_exec2, has_bsd, has_blt;
+ int has_exec2 = 0, has_bsd = 0, has_blt = 0, has_vebox = 0;
+ char *env_str = NULL;
+
+ g_intel_debug_option_flags = 0;
+ if ((env_str = getenv("VA_INTEL_DEBUG")))
+ g_intel_debug_option_flags = atoi(env_str);
+
+ if (g_intel_debug_option_flags)
+ fprintf(stderr, "g_intel_debug_option_flags:%x\n", g_intel_debug_option_flags);
assert(drm_state);
assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) ||
@@ -91,12 +102,19 @@ intel_driver_init(VADriverContextP ctx)
pthread_mutex_init(&intel->ctxmutex, NULL);
intel_driver_get_param(intel, I915_PARAM_CHIPSET_ID, &intel->device_id);
+ intel->device_info = i965_get_device_info(intel->device_id);
+
+ if (!intel->device_info)
+ return false;
+
if (intel_driver_get_param(intel, I915_PARAM_HAS_EXECBUF2, &has_exec2))
intel->has_exec2 = has_exec2;
if (intel_driver_get_param(intel, I915_PARAM_HAS_BSD, &has_bsd))
intel->has_bsd = has_bsd;
if (intel_driver_get_param(intel, I915_PARAM_HAS_BLT, &has_blt))
intel->has_blt = has_blt;
+ if (intel_driver_get_param(intel, I915_PARAM_HAS_VEBOX, &has_vebox))
+ intel->has_vebox = !!has_vebox;
intel_driver_get_revid(intel, &intel->revision);
intel_memman_init(intel);
diff --git a/src/intel_driver.h b/src/intel_driver.h
index 9631b96..432a0d9 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -38,6 +38,8 @@
#define XY_COLOR_BLT_WRITE_RGB (1 << 20)
#define XY_COLOR_BLT_DST_TILED (1 << 11)
+#define GEN8_XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x05)
+
/* BR13 */
#define BR13_8 (0x0 << 24)
#define BR13_565 (0x1 << 24)
@@ -45,6 +47,7 @@
#define BR13_8888 (0x3 << 24)
#define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16))
+#define CMD_PIPE_CONTROL_CS_STALL (1 << 20)
#define CMD_PIPE_CONTROL_NOWRITE (0 << 14)
#define CMD_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define CMD_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
@@ -57,12 +60,14 @@
#define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5)
#define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
struct intel_batchbuffer;
#define ALIGN(i, n) (((i) + (n) - 1) & ~((n) - 1))
+#define IS_ALIGNED(i, n) (((i) & ((n)-1)) == 0)
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
@@ -71,6 +76,18 @@ struct intel_batchbuffer;
#define True 1
#define False 0
+extern uint32_t g_intel_debug_option_flags;
+#define VA_INTEL_DEBUG_OPTION_ASSERT (1 << 0)
+#define VA_INTEL_DEBUG_OPTION_BENCH (1 << 1)
+
+#define ASSERT_RET(value, fail_ret) do { \
+ if (!(value)) { \
+ if (g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_ASSERT) \
+ assert(value); \
+ return fail_ret; \
+ } \
+ } while (0)
+
#define SET_BLOCKED_SIGSET() do { \
sigset_t bl_mask; \
sigfillset(&bl_mask); \
@@ -104,6 +121,21 @@ struct intel_batchbuffer;
} \
} while (0)
+struct intel_device_info
+{
+ int gen;
+ int gt;
+
+ unsigned int urb_size;
+ unsigned int max_wm_threads;
+
+ unsigned int is_g4x : 1; /* gen4 */
+ unsigned int is_ivybridge : 1; /* gen7 */
+ unsigned int is_baytrail : 1; /* gen7 */
+ unsigned int is_haswell : 1; /* gen7 */
+ unsigned int is_cherryview : 1; /* gen8 */
+};
+
struct intel_driver_data
{
int fd;
@@ -121,6 +153,9 @@ struct intel_driver_data
unsigned int has_exec2 : 1; /* Flag: has execbuffer2? */
unsigned int has_bsd : 1; /* Flag: has bitstream decoder for H.264? */
unsigned int has_blt : 1; /* Flag: has BLT unit? */
+ unsigned int has_vebox : 1; /* Flag: has VEBOX unit */
+
+ const struct intel_device_info *device_info;
};
bool intel_driver_init(VADriverContextP ctx);
@@ -145,205 +180,16 @@ struct intel_region
dri_bo *bo;
};
-#define PCI_CHIP_GM45_GM 0x2A42
-#define PCI_CHIP_IGD_E_G 0x2E02
-#define PCI_CHIP_Q45_G 0x2E12
-#define PCI_CHIP_G45_G 0x2E22
-#define PCI_CHIP_G41_G 0x2E32
-#define PCI_CHIP_B43_G 0x2E42
-#define PCI_CHIP_B43_G1 0x2E92
-
-#define PCI_CHIP_IRONLAKE_D_G 0x0042
-#define PCI_CHIP_IRONLAKE_M_G 0x0046
-
-#ifndef PCI_CHIP_SANDYBRIDGE_GT1
-#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */
-#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
-#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
-#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */
-#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
-#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
-#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A /* Server */
-#endif
-
-#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
-#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
-#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
-#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
-#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
-#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
-
-#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */
-#define PCI_CHIP_HASWELL_GT2 0x0412
-#define PCI_CHIP_HASWELL_GT3 0x0422
-#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */
-#define PCI_CHIP_HASWELL_M_GT2 0x0416
-#define PCI_CHIP_HASWELL_M_GT3 0x0426
-#define PCI_CHIP_HASWELL_S_GT1 0x040a /* Server */
-#define PCI_CHIP_HASWELL_S_GT2 0x041a
-#define PCI_CHIP_HASWELL_S_GT3 0x042a
-#define PCI_CHIP_HASWELL_B_GT1 0x040b /* Reserved */
-#define PCI_CHIP_HASWELL_B_GT2 0x041b
-#define PCI_CHIP_HASWELL_B_GT3 0x042b
-#define PCI_CHIP_HASWELL_E_GT1 0x040e /* Reserved */
-#define PCI_CHIP_HASWELL_E_GT2 0x041e
-#define PCI_CHIP_HASWELL_E_GT3 0x042e
-
-#define PCI_CHIP_HASWELL_SDV_GT1 0x0c02 /* Desktop */
-#define PCI_CHIP_HASWELL_SDV_GT2 0x0c12
-#define PCI_CHIP_HASWELL_SDV_GT3 0x0c22
-#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0c06 /* Mobile */
-#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0c16
-#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0c26
-#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0c0a /* Server */
-#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0c1a
-#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0c2a
-#define PCI_CHIP_HASWELL_SDV_B_GT1 0x0c0b /* Reserved */
-#define PCI_CHIP_HASWELL_SDV_B_GT2 0x0c1b
-#define PCI_CHIP_HASWELL_SDV_B_GT3 0x0c2b
-#define PCI_CHIP_HASWELL_SDV_E_GT1 0x0c0e /* Reserved */
-#define PCI_CHIP_HASWELL_SDV_E_GT2 0x0c1e
-#define PCI_CHIP_HASWELL_SDV_E_GT3 0x0c2e
-
-#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */
-#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12
-#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22
-#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */
-#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16
-#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26
-#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */
-#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
-#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A
-#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B /* Reserved */
-#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B
-#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B
-#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E /* Reserved */
-#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E
-#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E
-
-#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */
-#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12
-#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22
-#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */
-#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16
-#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26
-#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */
-#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A
-#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A
-#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B /* Reserved */
-#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B
-#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B
-#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E /* Reserved */
-#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E
-#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E
-
-#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
- devid == PCI_CHIP_Q45_G || \
- devid == PCI_CHIP_G45_G || \
- devid == PCI_CHIP_G41_G || \
- devid == PCI_CHIP_B43_G || \
- devid == PCI_CHIP_B43_G1)
-
-#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
-#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
-
-#define IS_IRONLAKE_D(devid) (devid == PCI_CHIP_IRONLAKE_D_G)
-#define IS_IRONLAKE_M(devid) (devid == PCI_CHIP_IRONLAKE_M_G)
-#define IS_IRONLAKE(devid) (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid))
-
-#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
- devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
- devid == PCI_CHIP_SANDYBRIDGE_S_GT)
-
-#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
- devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
- devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
- devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
-
-#define IS_GEN6(devid) (IS_SNB_GT1(devid) || \
- IS_SNB_GT2(devid))
-
-#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \
- devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
- devid == PCI_CHIP_IVYBRIDGE_S_GT1)
-
-#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \
- devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
- devid == PCI_CHIP_IVYBRIDGE_S_GT2)
-
-#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || \
- IS_IVB_GT2(devid))
-
-#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \
- devid == PCI_CHIP_HASWELL_M_GT1 || \
- devid == PCI_CHIP_HASWELL_S_GT1 || \
- devid == PCI_CHIP_HASWELL_B_GT1 || \
- devid == PCI_CHIP_HASWELL_E_GT1 || \
- devid == PCI_CHIP_HASWELL_SDV_GT1 || \
- devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \
- devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \
- devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \
- devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \
- devid == PCI_CHIP_HASWELL_CRW_GT1 || \
- devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \
- devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \
- devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \
- devid == PCI_CHIP_HASWELL_CRW_E_GT1 || \
- devid == PCI_CHIP_HASWELL_ULT_GT1 || \
- devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \
- devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \
- devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \
- devid == PCI_CHIP_HASWELL_ULT_E_GT1)
-
-
-#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2|| \
- devid == PCI_CHIP_HASWELL_M_GT2|| \
- devid == PCI_CHIP_HASWELL_S_GT2|| \
- devid == PCI_CHIP_HASWELL_B_GT2 || \
- devid == PCI_CHIP_HASWELL_E_GT2 || \
- devid == PCI_CHIP_HASWELL_SDV_GT2|| \
- devid == PCI_CHIP_HASWELL_SDV_M_GT2|| \
- devid == PCI_CHIP_HASWELL_SDV_S_GT2|| \
- devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \
- devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \
- devid == PCI_CHIP_HASWELL_CRW_GT2|| \
- devid == PCI_CHIP_HASWELL_CRW_M_GT2|| \
- devid == PCI_CHIP_HASWELL_CRW_S_GT2|| \
- devid == PCI_CHIP_HASWELL_CRW_B_GT2|| \
- devid == PCI_CHIP_HASWELL_CRW_E_GT2|| \
- devid == PCI_CHIP_HASWELL_ULT_GT2|| \
- devid == PCI_CHIP_HASWELL_ULT_M_GT2|| \
- devid == PCI_CHIP_HASWELL_ULT_S_GT2|| \
- devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \
- devid == PCI_CHIP_HASWELL_ULT_E_GT2)
-
-
-#define IS_HSW_GT3(devid) (devid == PCI_CHIP_HASWELL_GT3 || \
- devid == PCI_CHIP_HASWELL_M_GT3 || \
- devid == PCI_CHIP_HASWELL_S_GT3 || \
- devid == PCI_CHIP_HASWELL_B_GT3 || \
- devid == PCI_CHIP_HASWELL_E_GT3 || \
- devid == PCI_CHIP_HASWELL_SDV_GT3 || \
- devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \
- devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \
- devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \
- devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \
- devid == PCI_CHIP_HASWELL_CRW_GT3 || \
- devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \
- devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \
- devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \
- devid == PCI_CHIP_HASWELL_CRW_E_GT3 || \
- devid == PCI_CHIP_HASWELL_ULT_GT3 || \
- devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \
- devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \
- devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \
- devid == PCI_CHIP_HASWELL_ULT_E_GT3)
-
-#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \
- IS_HSW_GT2(devid) || \
- IS_HSW_GT3(devid))
-
-#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \
- IS_HASWELL(devid))
+#define IS_G4X(device_info) (device_info->is_g4x)
+
+#define IS_IRONLAKE(device_info) (device_info->gen == 5)
+
+#define IS_GEN6(device_info) (device_info->gen == 6)
+
+#define IS_HASWELL(device_info) (device_info->is_haswell)
+#define IS_GEN7(device_info) (device_info->gen == 7)
+
+#define IS_CHERRYVIEW(device_info) (device_info->is_cherryview)
+#define IS_GEN8(device_info) (device_info->gen == 8)
#endif /* _INTEL_DRIVER_H_ */
diff --git a/src/intel_media.h b/src/intel_media.h
index b30740a..55136d6 100644
--- a/src/intel_media.h
+++ b/src/intel_media.h
@@ -39,6 +39,7 @@ struct gen_avc_surface
dri_bo *dmv_top;
dri_bo *dmv_bottom;
int dmv_bottom_flag;
+ int frame_store_id; /* only used for H.264 on earlier generations (<HSW) */
};
extern void gen_free_avc_surface(void **data);
diff --git a/src/intel_version.h.in b/src/intel_version.h.in
new file mode 100644
index 0000000..050e834
--- /dev/null
+++ b/src/intel_version.h.in
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INTEL_VERSION_H
+#define INTEL_VERSION_H
+
+/**
+ * INTEL_DRIVER_GIT_VERSION:
+ *
+ * The full version identifier of libva-intel-driver, from a git
+ * repository, in string form (suitable for string concatenation).
+ */
+#define INTEL_DRIVER_GIT_VERSION "@INTEL_DRIVER_GIT_VERSION@"
+
+#endif /* INTEL_VERSION_H */
diff --git a/src/shaders/post_processing/Makefile.am b/src/shaders/post_processing/Makefile.am
index b19020f..0f5c2bf 100644
--- a/src/shaders/post_processing/Makefile.am
+++ b/src/shaders/post_processing/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = gen5_6 gen7
+SUBDIRS = gen5_6 gen7 gen75 gen8
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
index 280d37a..23bd306 100644
--- a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
+++ b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
@@ -62,24 +62,29 @@
#include "DI_Hist_Save.asm"
////////////////////////////////////// Save the DN Curr Frame for Next Run ////////////////////////
- add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w
- // check top/bottom field first
- cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w
- (f0.0) jmpi (1) TOP_FIELD_FIRST
-
-BOTTOM_FIELD_FIRST:
- $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
- mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2)
- mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3)
- }
- jmpi (1) SAVE_DN_CURR
-
-TOP_FIELD_FIRST:
- $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
- mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2)
- mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3)
+ // previous frame
+ $for (0; <nY_NUM_OF_ROWS/2; 1) {
+ mov (16) mubMSGHDR_DN(1, %1*16)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16)
}
-SAVE_DN_CURR:
+
+ mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
+ mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4)
+ mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud
+ send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud
+
+ //Write UV through DATAPORT
+ mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
+ asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's
+ mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (16x2)
+ mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud
+
+ mov (8) mubMSGHDR_DN(1, 0)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET, 1)<16 ;8,2>
+ mov (8) mubMSGHDR_DN(1, 1)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET, 16)<16 ;8,2>
+ mov (8) mubMSGHDR_DN(1, 16)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET+1, 1)<16 ;8,2>
+ mov (8) mubMSGHDR_DN(1, 17)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET+1, 16)<16 ;8,2>
+ send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud
+
+ // current frame
$for (0; <nY_NUM_OF_ROWS/2; 1) {
mov (16) mubMSGHDR_DN(1, %1*16)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16)
}
@@ -87,7 +92,7 @@ SAVE_DN_CURR:
mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4)
mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud
- send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud
+ send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_1_Y:ud
//Write UV through DATAPORT
mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
@@ -99,4 +104,4 @@ SAVE_DN_CURR:
mov (8) mubMSGHDR_DN(1, 1)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET, 16)<16 ;8,2>
mov (8) mubMSGHDR_DN(1, 16)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+1, 1)<16 ;8,2>
mov (8) mubMSGHDR_DN(1, 17)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+1, 16)<16 ;8,2>
- send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud \ No newline at end of file
+ send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_1_UV:ud
diff --git a/src/shaders/post_processing/gen5_6/Makefile.am b/src/shaders/post_processing/gen5_6/Makefile.am
index bb8caa4..4a13d9d 100755
--- a/src/shaders/post_processing/gen5_6/Makefile.am
+++ b/src/shaders/post_processing/gen5_6/Makefile.am
@@ -16,6 +16,7 @@ INTEL_PP_G4B_GEN5 = \
nv12_load_save_pl3.g4b.gen5 \
nv12_load_save_rgbx.g4b.gen5 \
nv12_scaling_nv12.g4b.gen5 \
+ pa_load_save_pa.g4b.gen5 \
pa_load_save_nv12.g4b.gen5 \
pa_load_save_pl3.g4b.gen5 \
pl3_load_save_nv12.g4b.gen5 \
@@ -33,6 +34,7 @@ INTEL_PP_G6B = \
nv12_load_save_pl3.g6b \
nv12_load_save_rgbx.g6b \
nv12_scaling_nv12.g6b \
+ pa_load_save_pa.g6b \
pa_load_save_nv12.g6b \
pa_load_save_pl3.g6b \
pl3_load_save_nv12.g6b \
@@ -50,6 +52,7 @@ INTEL_PP_ASM = \
nv12_load_save_pl3.asm \
nv12_load_save_rgbx.asm \
nv12_scaling_nv12.asm \
+ pa_load_save_pa.asm \
pa_load_save_nv12.asm \
pa_load_save_pl3.asm \
pl3_load_save_nv12.asm \
@@ -178,7 +181,7 @@ endif
all-local: $(TARGETS)
-SUFFIXES = .g4a .g4b .g6a .g6b .g5s .g6s .asm
+SUFFIXES = .g4a .g4b .g4b.gen5 .g6a .g6b .g5s .g6s .asm
if HAVE_GEN4ASM
.g4a.g4b:
diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5
index 446fb4b..4563d20 100644
--- a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5
+++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5
@@ -44,18 +44,23 @@
{ 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
{ 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
{ 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
- { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
- { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
- { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
- { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
- { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
- { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
- { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
- { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
- { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00b10440, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00b10450, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00b10460, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00b10470, 0x00000000 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00600001, 0x40400232, 0x00ae0481, 0x00000000 },
+ { 0x00600001, 0x40410232, 0x00ae0490, 0x00000000 },
+ { 0x00600001, 0x40500232, 0x00ae04a1, 0x00000000 },
+ { 0x00600001, 0x40510232, 0x00ae04b0, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
{ 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 },
{ 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 },
{ 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 },
@@ -63,7 +68,7 @@
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0608200a },
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
@@ -72,7 +77,7 @@
{ 0x00600001, 0x40410232, 0x00ae0510, 0x00000000 },
{ 0x00600001, 0x40500232, 0x00ae0521, 0x00000000 },
{ 0x00600001, 0x40510232, 0x00ae0530, 0x00000000 },
- { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0408200b },
{ 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
{ 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
{ 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
@@ -81,10 +86,10 @@
{ 0x01000010, 0x20003dac, 0x00000086, 0x00010001 },
{ 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 },
{ 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 },
- { 0x00010220, 0x34001c00, 0x02001400, 0xffffff64 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff5a },
{ 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
{ 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
- { 0x00000220, 0x34001c00, 0x00001400, 0xffffff5e },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff54 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b
index 111d483..8d6ebe3 100644
--- a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b
+++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b
@@ -117,18 +117,23 @@
{ 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
{ 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
{ 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 },
- { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
- { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
- { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
- { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
- { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
- { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
- { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
- { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
- { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00b10440, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00b10450, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00b10460, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00b10470, 0x00000000 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00600001, 0x40400232, 0x00ae0481, 0x00000000 },
+ { 0x00600001, 0x40410232, 0x00ae0490, 0x00000000 },
+ { 0x00600001, 0x40500232, 0x00ae04a1, 0x00000000 },
+ { 0x00600001, 0x40510232, 0x00ae04b0, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
{ 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 },
{ 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 },
{ 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 },
@@ -136,7 +141,7 @@
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0609400a },
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
@@ -145,7 +150,7 @@
{ 0x00600001, 0x40410232, 0x00ae0510, 0x00000000 },
{ 0x00600001, 0x40500232, 0x00ae0521, 0x00000000 },
{ 0x00600001, 0x40510232, 0x00ae0530, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0409400b },
{ 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
{ 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
{ 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
@@ -154,10 +159,10 @@
{ 0x01000010, 0x20003dac, 0x00000086, 0x00010001 },
{ 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 },
{ 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 },
- { 0x00010220, 0x34001c00, 0x02001400, 0xffffff64 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff5a },
{ 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
{ 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
- { 0x00000220, 0x34001c00, 0x00001400, 0xffffff5e },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff54 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm b/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm
new file mode 100644
index 0000000..72c2a8a
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm
@@ -0,0 +1,17 @@
+// Module name: PA_LOAD_SAVE_PA
+.kernel PA_LOAD_SAVE_PA
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "PA_Load_8x8.asm"
+#include "PL8x8_Save_PA.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of pa_load_save_pa.asm
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5
new file mode 100644
index 0000000..a75c75a
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5
@@ -0,0 +1,115 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0288a001 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+ { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+ { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+ { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+ { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+ { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+ { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+ { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+ { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+ { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+ { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+ { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+ { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+ { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+ { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+ { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+ { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+ { 0x01600031, 0x21400c01, 0x408d0000, 0x0288a007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+ { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+ { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+ { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+ { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+ { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+ { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x12082007 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000010 },
+ { 0x00000001, 0x20b80129, 0x000000c8, 0x00000000 },
+ { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 },
+ { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 },
+ { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff36 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff30 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b
new file mode 100644
index 0000000..5d9fe48
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b
@@ -0,0 +1,188 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02898001 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+ { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+ { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+ { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+ { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+ { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+ { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+ { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+ { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+ { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+ { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+ { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+ { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+ { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+ { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+ { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+ { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+ { 0x04600031, 0x21400cc1, 0x00000020, 0x02898007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+ { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+ { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+ { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+ { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+ { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+ { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x12094007 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000010 },
+ { 0x00000001, 0x20b80129, 0x000000c8, 0x00000000 },
+ { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 },
+ { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 },
+ { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff36 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff30 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/Makefile.am b/src/shaders/post_processing/gen7/Makefile.am
index f1a1c60..f4e2a8d 100644
--- a/src/shaders/post_processing/gen7/Makefile.am
+++ b/src/shaders/post_processing/gen7/Makefile.am
@@ -4,6 +4,7 @@ INTEL_PP_G7B = \
nv12_dn_nv12.g7b \
pa_to_pl2.g7b \
pa_to_pl3.g7b \
+ pa_to_pa.g7b \
pl2_to_pa.g7b \
pl2_to_pl2.g7b \
pl2_to_pl3.g7b \
@@ -81,16 +82,18 @@ all-local: $(TARGETS)
SUFFIXES = .g7b .g7s .asm
+if HAVE_GEN4ASM
$(INTEL_PP_GEN7_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G4A)
.asm.g7s:
$(AM_V_GEN)cpp $< > _pp0.$@; \
../../gpp.py _pp0.$@ $@; \
rm _pp0.$@
.g7s.g7b:
- $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7 $<
+ $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 7 $<
.g7s.g75b:
- $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7.5 $<
+ $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 7.5 $<
+endif
CLEANFILES = $(INTEL_PP_GEN7_ASM)
diff --git a/src/shaders/post_processing/gen7/pa_to_pa.asm b/src/shaders/post_processing/gen7/pa_to_pa.asm
new file mode 100644
index 0000000..62f14bd
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PA_TO_PL3
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_VYUA.g4a"
+#include "PA_AVS_Buf_0.g4a"
+#include "PA_AVS_Buf_1.g4a"
+#include "PA_AVS_Buf_2.g4a"
+#include "PA_AVS_Buf_3.g4a"
+#include "Save_AVS_PA.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pa_to_pa.g75b b/src/shaders/post_processing/gen7/pa_to_pa.g75b
new file mode 100644
index 0000000..0ccd59e
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pa.g75b
@@ -0,0 +1,677 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000240 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+ { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pa.g7b b/src/shaders/post_processing/gen7/pa_to_pa.g7b
new file mode 100644
index 0000000..20728b5
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pa.g7b
@@ -0,0 +1,677 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000048 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+ { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen75/Makefile.am b/src/shaders/post_processing/gen75/Makefile.am
new file mode 100644
index 0000000..cdad1d9
--- /dev/null
+++ b/src/shaders/post_processing/gen75/Makefile.am
@@ -0,0 +1,9 @@
+INTEL_PP_PRE_G75B = \
+ sharpening_h_blur.g75b \
+ sharpening_unmask.g75b \
+ sharpening_v_blur.g75b
+
+EXTRA_DIST = $(INTEL_PP_PRE_G75B)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/gen8/EOT.g8a b/src/shaders/post_processing/gen8/EOT.g8a
new file mode 100644
index 0000000..72c3da3
--- /dev/null
+++ b/src/shaders/post_processing/gen8/EOT.g8a
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 2 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am
new file mode 100644
index 0000000..54533fc
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Makefile.am
@@ -0,0 +1,79 @@
+INTEL_PP_G8B = \
+ pl2_to_pl2.g8b \
+ pl2_to_pl3.g8b \
+ pl3_to_pl2.g8b \
+ pl3_to_pl3.g8b \
+ pl2_to_rgbx.g8b \
+ rgbx_to_nv12.g8b \
+ pl2_to_pa.g8b \
+ pl3_to_pa.g8b \
+ pa_to_pl2.g8b \
+ pa_to_pl3.g8b \
+ pa_to_pa.g8b \
+ $(NULL)
+
+INTEL_PP_PRE_G8B = \
+ sharpening_h_blur.g8b \
+ sharpening_unmask.g8b \
+ sharpening_v_blur.g8b
+
+INTEL_PP_G8A = \
+ EOT.g8a \
+ PL2_AVS_Buf_0.g8a \
+ PL2_AVS_Buf_1.g8a \
+ PL2_AVS_Buf_2.g8a \
+ PL2_AVS_Buf_3.g8a \
+ PL3_AVS_Buf_0.g8a \
+ PL3_AVS_Buf_1.g8a \
+ PL3_AVS_Buf_2.g8a \
+ PL3_AVS_Buf_3.g8a \
+ PA_AVS_Buf_0.g8a \
+ PA_AVS_Buf_1.g8a \
+ PA_AVS_Buf_2.g8a \
+ PA_AVS_Buf_3.g8a \
+ Save_AVS_NV12.g8a \
+ Save_AVS_PL3.g8a \
+ Save_AVS_RGBX.g8a \
+ Save_AVS_PA.g8a \
+ Set_AVS_Buf_0123_PL2.g8a \
+ Set_AVS_Buf_0123_PL3.g8a \
+ Set_AVS_Buf_0123_BGRA.g8a \
+ Set_AVS_Buf_0123_VYUA.g8a \
+ YUV_to_RGB.g8a \
+ RGB_to_YUV.g8a \
+ Set_Layer_0.g8a \
+ VP_Setup.g8a \
+ $(NULL)
+
+INTEL_PP_ASM = $(INTEL_PP_G8B:%.g8b=%.asm)
+INTEL_PP_GEN8_ASM = $(INTEL_PP_G8B:%.g8b=%.g8s)
+
+TARGETS =
+if HAVE_GEN4ASM
+TARGETS += $(INTEL_PP_G8B)
+endif
+
+all-local: $(TARGETS)
+
+SUFFIXES = .g8b .g8s .asm
+
+if HAVE_GEN4ASM
+$(INTEL_PP_GEN8_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G8A)
+.asm.g8s:
+ $(AM_V_GEN)cpp $< > _pp0.$@; \
+ ../../gpp.py _pp0.$@ $@; \
+ rm _pp0.$@
+.g8s.g8b:
+ $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 8 $<
+endif
+
+CLEANFILES = $(INTEL_PP_GEN7_ASM)
+
+EXTRA_DIST = \
+ $(INTEL_PP_ASM) \
+ $(INTEL_PP_G8A) \
+ $(INTEL_PP_G8B) \
+ $(INTEL_PP_PRE_G8B)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a
new file mode 100644
index 0000000..228b256
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 0:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud
+ // Returns RGBA data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a
new file mode 100644
index 0000000..c93806d
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 1:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud
+ // Returns RGBA data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a
new file mode 100644
index 0000000..2cfc90c
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 2:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud
+ // Returns RGBA data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a
new file mode 100644
index 0000000..0cbc4ba
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 3:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud
+ // Returns RGBA data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a
new file mode 100644
index 0000000..bbff22c
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+
+ // set the vertical block number
+
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a
new file mode 100644
index 0000000..e916576
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 42 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_1.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 1
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 1:ud
+
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_1_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a
new file mode 100644
index 0000000..ed51a19
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 42 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_2.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 2
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+
+ mov (1) r25.1<1>:ud 2:ud
+
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_2_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a
new file mode 100644
index 0000000..5b46bf7
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a
@@ -0,0 +1,460 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 42 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_3.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+
+ mov (1) r25.1<1>:ud 3:ud
+
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_3_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a
new file mode 100644
index 0000000..b5b85d5
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 0:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV
+
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_0(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a
new file mode 100644
index 0000000..8457ae1
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 1:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV
+
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_1(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a
new file mode 100644
index 0000000..99b40fe
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 2:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV
+
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_2(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a
new file mode 100644
index 0000000..8659876
--- /dev/null
+++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ mov (1) r16.3<1>:ud r0.3<0;1,0>:ud
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc
+
+ mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel
+
+
+
+ // set the vertical block number
+
+ mov (1) r25.1<1>:ud 3:ud
+
+ mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV
+
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_3(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/RGB_to_YUV.g8a b/src/shaders/post_processing/gen8/RGB_to_YUV.g8a
new file mode 100644
index 0000000..2cda31e
--- /dev/null
+++ b/src/shaders/post_processing/gen8/RGB_to_YUV.g8a
@@ -0,0 +1,910 @@
+/*
+ * Copyright 2000-2013 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: YUV_to_RGB.asm
+//
+// Convert YUV to RGB, handle it by 16x4 block
+//
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+//Pointer to mask reg
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+//Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB
+
+.declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+.declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+.declare wTempY Base=r42.0 ElementSize=2 Type=w
+.declare wTempU Base=r44.0 ElementSize=2 Type=w
+.declare wTempV Base=r46.0 ElementSize=2 Type=w
+
+.declare ubTempY Base=r42.0 ElementSize=1 Type=ub
+.declare ubTempU Base=r44.0 ElementSize=1 Type=ub
+.declare ubTempV Base=r46.0 ElementSize=1 Type=ub
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+ // ITU-R conversion, Now we are using ITU-R conversion
+ // Y = 0.299R + 0.587G + 0.114B
+ // U = -0.169R - 0.331G + 0.499B + 128
+ // V = 0.499R - 0.418G - 0.0813B+ 128
+
+ // At the save module we have all 8 address sub-registers available.
+ // So we will use PING-PONG type of scheme to save the data using
+ // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+ // reduce dependency. - rT
+
+ //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+ //Offsets are zero for buffer 0 and buffer 4.
+ //It always uses the YUVA layout.
+//for BUFFER_0
+ mov (4) a0.0<1>:uw r22.0<4;4,1>:uw
+ mov (4) a0.4<1>:uw r22.0<4;4,1>:uw
+ // YUV uses the a0.5,a0.6 and a0.4 as the indirect-register
+ // Y = a0.5, U=a0.6, V=a0.4
+ // if channel swap?
+ // This means that it should be BGRX(B is the LSB) or RGBX
+ // 1 means that it is BGRX.
+ and.nz.f0.0 null<1>:w r2.0<0;1,0>:uw 0x01:w
+ // pointer swap
+ (f0.0) mov (1) uwTemp0<1> a0.0:uw
+ (f0.0) mov (1) a0.0:uw a0.1:uw
+ (f0.0) mov (1) a0.1:uw uwTemp0<0;1,0>
+
+//the first line in the block 0
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 0]<1>:uw 0:uw
+ mov (16) r[a0.6, 0]<1>:uw 0:uw
+ mov (16) r[a0.4, 0]<1>:uw 0:uw
+ mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4>
+
+
+//the second line in the block 0
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 32]<1>:uw 0:uw
+ mov (16) r[a0.6, 32]<1>:uw 0:uw
+ mov (16) r[a0.4, 32]<1>:uw 0:uw
+ mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the third line in the block 0
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 64]<1>:uw 0:uw
+ mov (16) r[a0.6, 64]<1>:uw 0:uw
+ mov (16) r[a0.4, 64]<1>:uw 0:uw
+ mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the fourth line in the block 0
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 96]<1>:uw 0:uw
+ mov (16) r[a0.6, 96]<1>:uw 0:uw
+ mov (16) r[a0.4, 96]<1>:uw 0:uw
+ mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//for Buffer_1
+
+ add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw
+//the first line in the block 1
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 0]<1>:uw 0:uw
+ mov (16) r[a0.6, 0]<1>:uw 0:uw
+ mov (16) r[a0.4, 0]<1>:uw 0:uw
+ mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4>
+
+
+//the second line in the block 1
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 32]<1>:uw 0:uw
+ mov (16) r[a0.6, 32]<1>:uw 0:uw
+ mov (16) r[a0.4, 32]<1>:uw 0:uw
+ mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the third line in the block 1
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 64]<1>:uw 0:uw
+ mov (16) r[a0.6, 64]<1>:uw 0:uw
+ mov (16) r[a0.4, 64]<1>:uw 0:uw
+ mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the fourth line in the block 1
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 96]<1>:uw 0:uw
+ mov (16) r[a0.6, 96]<1>:uw 0:uw
+ mov (16) r[a0.4, 96]<1>:uw 0:uw
+ mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//for Buffer_2
+ add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw
+//the first line in the block 2
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 0]<1>:uw 0:uw
+ mov (16) r[a0.6, 0]<1>:uw 0:uw
+ mov (16) r[a0.4, 0]<1>:uw 0:uw
+ mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the second line in the block 2
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 32]<1>:uw 0:uw
+ mov (16) r[a0.6, 32]<1>:uw 0:uw
+ mov (16) r[a0.4, 32]<1>:uw 0:uw
+ mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the third line in the block 2
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 64]<1>:uw 0:uw
+ mov (16) r[a0.6, 64]<1>:uw 0:uw
+ mov (16) r[a0.4, 64]<1>:uw 0:uw
+ mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the fourth line in the block 2
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 96]<1>:uw 0:uw
+ mov (16) r[a0.6, 96]<1>:uw 0:uw
+ mov (16) r[a0.4, 96]<1>:uw 0:uw
+ mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//for Buffer_3
+ add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw
+//the first line in the block 3
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 0]<1>:uw 0:uw
+ mov (16) r[a0.6, 0]<1>:uw 0:uw
+ mov (16) r[a0.4, 0]<1>:uw 0:uw
+ mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4>
+
+
+//the second line in the block 3
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 32]<1>:uw 0:uw
+ mov (16) r[a0.6, 32]<1>:uw 0:uw
+ mov (16) r[a0.4, 32]<1>:uw 0:uw
+ mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the third line in the block 3
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 64]<1>:uw 0:uw
+ mov (16) r[a0.6, 64]<1>:uw 0:uw
+ mov (16) r[a0.4, 64]<1>:uw 0:uw
+ mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4>
+
+//the fourth line in the block 3
+ mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f
+ mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f
+ mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f
+
+ mov (16) acc0.0<1>:f 128.0f
+ mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f
+ mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f
+ mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f
+
+ mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1>
+ mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1>
+ mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1>
+
+ mov (16) r[a0.5, 96]<1>:uw 0:uw
+ mov (16) r[a0.6, 96]<1>:uw 0:uw
+ mov (16) r[a0.4, 96]<1>:uw 0:uw
+ mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4>
+ mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4>
+ mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4>
+
diff --git a/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a b/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a
new file mode 100644
index 0000000..dcb7ce0
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a
@@ -0,0 +1,621 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 131 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_NV12.asm
+//
+// Save NV12 420 frame data block of size 16x16
+//
+// To save 16x16 block (16x16 bytes of Y and 16x8 bytes of interleaved UV), we need 2 send instructions with of size 16x16 and 16x8 each.
+// ---------------
+// | 16x16 |
+// | YUYV |
+// ---------------
+// | 16x8 UV |
+// ---------------
+
+//-----------------------------------------------------------------
+//The layout of data is as follows:
+//mMSGHDR0 : Y data header (16x16)
+//mubMSGPAYLOAD0 : Y data payload (8 GRFs)
+//mMSGHDR1 : U data header (16x8)
+//mubMSGPAYLOAD1 : U data payload (4 GRFs)
+//------------------------------------------------------------------
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+ // At the save module we have all 8 address sub-registers available.
+ // So we will use PING-PONG type of scheme to save the data using
+ // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+ // reduce dependency. - rT
+
+ //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+ //Offsets are zero for buffer 0 and buffer 4.
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw
+
+ //Set up header for Y,U and V data
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+
+ mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI
+ mov (1) r37.0<1>:d r7.0<0;1,0>:w { NoDDClr } //H ORI (CHROMA) = H ORI
+ shr (1) r37.1<1>:d r7.1<0;1,0>:w 1:w { NoDDClr, NoDDChk } //V ORI (CHROMA) = V ORI/2
+
+ mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16)
+ mov (1) r37.2<1>:ud 0x7000F:ud { NoDDChk } // UV Block width and height(16x8)
+
+// Unscramble, and pack data directly to MRFs
+
+// Data 16x16 block is divided as -
+// ---------
+// | 0 |
+// ---------
+// | 1 |
+// ---------
+// | 2 |
+// ---------
+// | 3 |
+// ---------
+// All sub-blocks are of size 16x4
+// 0: ubBUFFER_0
+// 1: ubBUFFER_1, ubBUFFER_0+16
+// 2: ubBUFFER_2
+// 3: ubBUFFER_3, ubBUFFER_2+16
+
+ //Y Rounding 16x4 top part
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+ //Y Rounding, 16x4 bottom part
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+ //Y Rounding 16x4 top part
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+ //Y Rounding, 16x4 bottom part
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+ // restore pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4 registers
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw
+
+//Buffer 0
+//Move Y to msg payload
+ mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+//Move U to msg payload
+ mov (8) mubMSGPAYLOAD1(0,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(0,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+//Move V to msg payload
+ mov (8) mubMSGPAYLOAD1(0,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk }
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+//Buffer 1
+ mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(1,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+
+//Buffer 2
+ mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(2,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(2,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(2,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(2,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk }
+
+//Buffer 3
+ mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(3,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(3,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(3,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(3,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+//===========================================================================
+
+send (1) null<1>:d r28 0xc 0x120A8018:ud
+send (1) null<1>:d r37 0xc 0xA0A8019:ud
diff --git a/src/shaders/post_processing/gen8/Save_AVS_PA.g8a b/src/shaders/post_processing/gen8/Save_AVS_PA.g8a
new file mode 100644
index 0000000..1cedac7
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Save_AVS_PA.g8a
@@ -0,0 +1,629 @@
+/*
+ * Copyright 2000-2013 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * Authors: Zhao Yakui <yakui.zhao@intel.com>
+ */
+// 174 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_PA.asm
+//
+// Save PA 422 frame data block of size 16x16
+//
+// To save 16x16 block (32x16 bytes of YUYV) we need 2 send instructions with of size 16x16 each.
+// -------------------------------
+// | 16x16 | 16x16 |
+// | YUYV | YUYV |
+// -------------------------------
+// these 2 sends are replaced by 8 32x2 sends to improve performance
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+ //wBUFF_CHNL_PTR points to buffer 0.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3).
+ //Offset is zero for buffer 0.
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw
+
+ //Set DEST pointers according to output packing i.e. YUYV, YVYU, UYVY, VYUY
+ add (4) a0.4<1>:w r2.28<4;4,1>:ub 928:uw
+
+ /* X block origin. YUY2 or UYUV */
+ shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be 2 times
+ mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant)
+ mov (1) r27.2<1>:ud 0x1001F:ud { NoDDChk } // Block width and height (32x2)
+
+// Rounding
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 512:uw
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1536:uw
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 2048:uw
+ // restore pointer
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw
+
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 2:d // Point to 2nd part
+
+ /* a0.2 U, a0.1 Y, a0.0 V */
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ /* a0.4 + 288 = r38 */
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0xc 0x60A8018:ud
+ send (1) null<1>:d r37 0xc 0x60A8018:ud
+
+ // restore pointer
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 512:uw
+
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 4:d // Point to 2nd part
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 6:d // Point to 2nd part
+
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0xc 0x60A8018:ud
+ send (1) null<1>:d r37 0xc 0x60A8018:ud
+
+ // restore pointer
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw
+
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d // Point to 2nd part
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 10:d // Point to 2nd part
+
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0xc 0x60A8018:ud
+ send (1) null<1>:d r37 0xc 0x60A8018:ud
+
+ // restore pointer
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1536:uw
+
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 12:d // Point to 2nd part
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 14:d // Point to 2nd part
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0xc 0x60A8018:ud
+ send (1) null<1>:d r37 0xc 0x60A8018:ud
+
diff --git a/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a b/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a
new file mode 100644
index 0000000..417fd4f
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a
@@ -0,0 +1,565 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Author: Zhao Yakui <yakui.zhao@intel.com>
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 84 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_PL3.asm
+//
+// Save PL3 420 frame data block of size 16x16
+//
+// To save 16x16 block (16x16 byte of Y and 8x8 byte of U and V each) we need 3 send instructions with one of size 16x16 and two of size 8x8.
+// -----------------
+// | 16x16 Y |
+// | |
+// -----------------
+// | 8x8 U |
+// ---------
+// | 8x8 V |
+// ---------
+
+//-----------------------------------------------------------------
+//The layout of data is as follows:
+//mMSGHDR0 : Y data header (16x16)
+//mubMSGPAYLOAD0 : Y data payload (8 GRFs)
+//mMSGHDR1 : U data header (8x8)
+//mubMSGPAYLOAD1 : U data payload (2 GRFs)
+//mMSGHDR2 : V data header (8x8)
+//mubMSGPAYLOAD2 : V data payload (2 GRFs)
+//------------------------------------------------------------------
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+ // At the save module we have all 8 address sub-registers available.
+ // So we will use PING-PONG type of scheme to save the data using
+ // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+ // reduce dependency. - rT
+
+ //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+ //Offsets are zero for buffer 0 and buffer 4.
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw
+
+ //Set up header for Y,U and V data
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+ mov (8) r46<1>:ud r27<8;8,1>:ud
+
+ mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI
+ shr (2) r37.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI U = H/V ORI/2
+ shr (2) r46.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI V = H/V ORI/2
+
+ mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16)
+ mov (1) r37.2<1>:ud 0x70007:ud { NoDDChk } // U Block width and height (8x8)
+ mov (1) r46.2<1>:ud 0x70007:ud { NoDDChk } // V Block width and height (8x8)
+
+// Unscramble, and pack data directly to MRFs
+
+// Data 16x16 block is divided as -
+// ---------
+// | 0 |
+// ---------
+// | 1 |
+// ---------
+// | 2 |
+// ---------
+// | 3 |
+// ---------
+// All sub-blocks are of size 16x4
+// 0: ubBUFFER_0
+// 1: ubBUFFER_1, ubBUFFER_0+16
+// 2: ubBUFFER_2
+// 3: ubBUFFER_3, ubBUFFER_2+16
+
+ //Y Rounding, first
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+ //Y Rounding, second
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw
+
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+
+ //Y Rounding, third
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw
+
+
+ //Y Rounding, fourth
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw
+
+ // restore the TOP and BOT pointers
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw
+
+//Buffer 0
+//Move Y to msg payload
+ mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+//Move U to msg payload
+ mov (8) mubMSGPAYLOAD1(0,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(0,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+//Move V to msg payload
+ mov (8) mubMSGPAYLOAD2(0,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(0,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+//Buffer 1
+ mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(0,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(0,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(0,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+
+ add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+
+//Buffer 2
+ mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(1,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(1,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(1,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+//Buffer 3
+ mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(1,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(1,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+
+//===========================================================================
+
+send (1) null<1>:d r28 0xc 0x120A8018:ud
+send (1) null<1>:d r37 0xc 0x60A8019:ud
+send (1) null<1>:d r46 0xc 0x60A801A:ud
diff --git a/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a b/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a
new file mode 100644
index 0000000..d2df8e4
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a
@@ -0,0 +1,641 @@
+/*
+ * Copyright 2000-2013 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_RGBX.asm
+//
+// Save packed ARGB 444 frame data block of size 16x16
+//
+// To save 16x16 block (64x16 byte layout for ARGB8888) we need 8 send instructions with 32x4 in each
+// --------
+// | 0 | 1 |
+// | 2 | 3 |
+// | 4 | 5 |
+// | 6 | 7 |
+// ---------
+// the 8 32x4 block send is used
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+// At the save module we have all 8 address sub-registers available.
+// So we will use PING-PONG type of scheme to save the data using
+// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+// reduce dependency. - rT
+
+ //Internal LAYOUT:(RRGGBBAA)
+ //Assign buffer channel order for Buffer 0123 in the order RGBA a0.3>A, a0.2>B, a0.1>G, a0.0>R
+ // R = 0, G= 4, B = 8, A = 12.
+ mov (4) acc0.0<1>:w 0x62EA:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw
+
+ // if channel swap?
+ // This means that it should be BGRA(B is the LSB) or RGBA
+ // the internal format is always RGBA(MSB-A-B-G-R).
+ and.nz.f0.0 null<1>:w r2.3<0;1,0>:uw 0x01:w
+
+//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+//Offsets are zero for buffer 0 and buffer 4.
+ add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw
+
+ // pointer swap
+ (f0.0) mov (1) uwTemp0<1> a0.0<0;1,0>:uw
+ (f0.0) mov (1) a0.0<1>:uw a0.2<0;1,0>:uw
+ (f0.0) mov (1) a0.2<1>:uw uwTemp0<0;1,0>
+
+ shl (1) r27.0<1>:d r7.0<0;1,0>:w 2:w { NoDDClr } // H. block origin need to be quadrupled
+ mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant)
+ mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x4)
+
+ mov (4) a0.4<1>:uw a0.0<4;4,1>:uw
+
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+ mov (8) r46<1>:ud r27<8;8,1>:ud
+ mov (8) r55<1>:ud r27<8;8,1>:ud
+
+ mov (8) r31<1>:ud r27<8;8,1>:ud
+ mov (8) r40<1>:ud r27<8;8,1>:ud
+ mov (8) r49<1>:ud r27<8;8,1>:ud
+ mov (8) r58<1>:ud r27<8;8,1>:ud
+
+//Buffer 0/1 are written by using 4 32x4.
+
+ add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d
+
+ add (1) r46.1<1>:d r27.1<0;1,0>:d 4:d
+
+ add (1) r55.1<1>:d r27.1<0;1,0>:d 4:d
+ add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d
+
+ // write Buf_0 to 1st quarter of four horizontal output blocks
+
+// Please note the scattered order of NODDCLR, NODDCHK flags. Since the sub-registers
+// of destination reg are not updated at one place and hence even flags are scattered. -rT
+
+/* for block 0 the left part of buffer 0 and 1 */
+ mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub
+
+/* For Buffer 0 */
+ send (16) null<1>:d r28 0xc 0x0A0A8018:ud
+ send (16) null<1>:d r37 0xc 0x0A0A8018:ud
+
+ add (4) a0.0<1>:uw a0.4<4;4,1>:uw 512:uw
+ mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub
+ // send Buffer 1
+ send (16) null<1>:d r46 0xc 0x0A0A8018:ud
+ send (16) null<1>:d r55 0xc 0x0A0A8018:ud
+
+
+/* for Buffer 2/3 */
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+ mov (8) r46<1>:ud r27<8;8,1>:ud
+ mov (8) r55<1>:ud r27<8;8,1>:ud
+
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d
+
+ add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 8:d
+
+ add (1) r46.1<1>:d r27.1<0;1,0>:d 12:d
+
+ add (1) r55.1<1>:d r27.1<0;1,0>:d 12:d
+ add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d
+
+ add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1024:uw
+
+ mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub
+
+// Send Buffer 2
+ send (16) null<1>:d r28 0xc 0x0A0A8018:ud
+ send (16) null<1>:d r37 0xc 0x0A0A8018:ud
+
+ add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1536:uw
+ mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub
+
+ mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub
+ mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub
+ // send buffer 3
+ send (16) null<1>:d r46 0xc 0x0A0A8018:ud
+ send (16) null<1>:d r55 0xc 0x0A0A8018:ud
+
+
+
diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a
new file mode 100644
index 0000000..798564f
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a
@@ -0,0 +1,368 @@
+/*
+ * Copyright 2000-2013 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Authors: Zhao Yakui <yakui.zhao@intel.com>
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_BGRA.asm
+
+
+
+//Module Name: Set_Buf_0123_BGRA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT:(UUYYVVAA)
+ //AVS RGBX LAYOUT (RRGGBBAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ // V = 8, Y= 0, U = 4, A = 12.
+ // And a0.x is used as indirect-register for RGBX. R=a0.1, G=a0.2, B=a0.0
+ // B = 8, R= 0, G = 4, A = 12
+ mov (4) acc0.0<1>:w 0x6EA2:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+
+ //SU LAYOUT:(VYUAVYUA)
+ //V = 4, Y = 2, U = 0, A = 6
+ //B = 4, G = 2, R = 0, A = 6
+ mov (4) acc0.0<1>:w 0x6204:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a
new file mode 100644
index 0000000..1d38ae2
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a
@@ -0,0 +1,361 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_PL2.asm
+
+
+
+//Module Name: Set_Buf_0123_PL2
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT: (YYUUVVAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ //For PL2-AVS: V = 8, Y= 0, U = 4, A = 12.
+ mov (4) acc0.0<1>:w 0x6EA2:v //Subtract 6 from 0,4,8,12
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address.
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+ //SU LAYOUT:(YUVAYUVA)
+ //V = 4, Y = 0, U = 2, A = 6
+ mov (4) acc0.0<1>:w 0x6204:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a
new file mode 100644
index 0000000..0533666
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a
@@ -0,0 +1,362 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_PL3.asm
+
+
+
+//Module Name: Set_Buf_0123_PL3
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT: (YYUUVVAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ //For PL3-AVS: V = 8, Y= 0, U = 4, A = 12.
+ mov (4) acc0.0<1>:w 0x6EA2:v
+ //Subtract 6 from 0,4,8,12
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address.
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+ //SU LAYOUT:(YUVAYUVA)
+ //V = 4, Y = 0, U = 2, A = 6
+ mov (4) acc0.0<1>:w 0x6204:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a
new file mode 100644
index 0000000..3573e2b
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_VYUA.asm
+
+
+
+//Module Name: Set_Buf_0123_VYUA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT:(VVYYUUAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ // V = 0, Y= 4, U = 8, A = 12.
+ //YCrCb or YCrCb_Swap returns the following data:
+ //Cr is returned on R-channel. 0
+ //Y is returned on G channel. 4
+ //Cb is returned on B channel. 8
+ mov (4) acc0.0<1>:w 0x62EA:v //Subtract 6 from 0,4,8,12
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address.
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+
+ //SU LAYOUT:(VYUAVYUA)
+ //V = 0, Y = 2, U = 4, A = 6
+ mov (4) acc0.0<1>:w 0x6420:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen8/Set_Layer_0.g8a b/src/shaders/post_processing/gen8/Set_Layer_0.g8a
new file mode 100644
index 0000000..b1b574e
--- /dev/null
+++ b/src/shaders/post_processing/gen8/Set_Layer_0.g8a
@@ -0,0 +1,483 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 18 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+#define MSG_AVS_SAMPLE 0x00000000
+#define MSG_CONVOLE_SAMPLE 0x10000000
+#define MSG_MINMAX_SAMPLE 0x20000000
+#define MSG_MINMAXF_SAMPLE 0x30000000
+#define MSG_ERODE_SAMPLE 0x40000000
+#define MSG_DILATE_SAMPLE 0x50000000
+#define MSG_BOOLCENT_SAMPLE 0x60000000
+#define MSG_CENTROID_SAMPLE 0x70000000
+
+#define MSG_IEF_BYPASS 0x08000000
+#define MSG_IEF_ENABLE 0x00000000
+
+//16x4 or 8x4 or 16x8 or 4x4
+#define MSG_AVS_164 0x00000000
+#define MSG_AVS_84 0x02000000
+#define MSG_AVS_168 0x04000000
+#define MSG_AVS_44 0x06000000
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+
+
+
+
+//Module name: Set_Layer_N.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Used to generate LABELS at compile time.
+
+
+//definitions for Expand Mask
+.declare uwMask_Temp1 Base=r17.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+.declare ubMask_Temp1 Base=r17.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF
+.declare udMask_Temp1 Base=r17.0 ElementSize=4 Type=ud // 1 GRF
+.declare uwMask_Temp2 Base=r16.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+.declare ubMask_Temp2 Base=r16.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF
+.declare udMask_Temp2 Base=r16.0 ElementSize=4 Type=ud // 1 GRF
+
+.declare uwMask_Temp3 Base=r15.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+.declare ubMask_Temp3 Base=r15.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF
+
+.declare udALPHA_MASK_REG Base=r21.0 ElementSize=4 Type=ud // 1 GRF
+.declare udALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//Initialize mask reg to FFFF
+
+ mov (16) uwALPHA_MASK_REG(0)<1> 0xFFFF:uw
+
+
+//Fast jump for -
+//LAYER0: we determine whether layer 0 is to be loaded and processed or not based
+// on block mask in module "Set_Layer_0" and store result in f0.1.
+// This flag is then directly used to while loading buf0-3 and colorfill.
+// (So flag f0.1 should not be changed from Set_Layer_0 till Colorfill)
+//
+//LAYER1-7: For all other layers, we compute whether layer is to be loaded and processed
+// based on block mask in module "Set_Layer_1-7" and store result in SKIP_LAYER
+// variable.
+// While Loading buf 4 and 5, we move SKIP_LAYER to f0.0 every time and use it
+// for Loading.
+// For processing though, we move SKIP_LAYER only once to f0.1 in module
+// "Set_Buf0_Buf4" and use f0.1 for deciding whether layer 1-7 (all 4 sub blocks)
+// is to be processed or not.
+// (So flag f0.1) should not be modififed from module "Set_Buf0_Buf4" till module
+// that processess sub-block 3).
+//
+//None of the above fast jumps, apply to CSC modules. We always perform CSC irrespective of mask.
+//
+//Example: (Without going into finer details)
+// Typical Combined kernel:
+//
+// (let var = decision whether to load/process that layer)
+//
+// Set_Layer_0 //f0.1 <- var
+// ..
+// Set_Layer_1 //f0.1 <- var, SKIP_LAYER <- var
+// ..
+// Load buf 0 //use f0.1
+// Load buf 4 //f0.0 <- SKIP_LAYER
+// Load buf 1 //use f0.1
+// Load buf 5 //f0.0 <- SKIP_LAYER
+// Load buf 2 //use f0.1
+// Load buf 3 //use f0.1
+// ..
+// ..
+// Colorfill
+// ..
+// Set_Buf0_Buf4 //f0.1 <- SKIP_LAYER
+// process0-4 //Use f0.1
+// Load buf 4
+// Set_Buf1_Buf5
+// process1-5
+// Load buf 5
+// ..
+// Set_Layer_2 //f0.1 <-var, SKIP_LAYER <- var
+// ..
+// Set_Buf2_Buf4
+// process2-4
+// Load buf 4
+// Set_Buf3_Buf5
+// process3-5
+// Load buf 5
+// ..
+
+
+ and (1) r24.2<1>:ub r2.2<0;1,0>:uw 3:uw
+
+
+ //Copy all AVS Payload data
+ // Setup Message Payload Header for 1st block of Media Sampler 8x8 (16x4 for IVB+)
+ //currently the dx & dy is passed by Constant buffer (zero)
+ mov (1) r25.0<1>:f r7.6<0;1,0>:f //NLAS dy
+ mov (1) r25.6<1>:f r7.5<0;1,0>:f //NLAS dx
+ mov (1) r25.4<1>:f r3.0<0;1,0>:f //Step X
+ mov (1) r25.5<1>:f r4.0<0;1,0>:f //Step Y
+
+
+ mov (1) r25.2<1>:f r6.0<0;1,0>:f //Orig X
+ mov (1) r25.3<1>:f r5.0<0;1,0>:f //Orig Y
+
+ mov (1) r25.7<1>:ud 0:ud
+ add (1) r25.7<1>:ud r25.7<0;1,0>:ud MSG_AVS_SAMPLE + MSG_AVS_164 + MSG_IEF_BYPASS:ud
+
+ //NLAS calculations for 2nd half of blocks of Media Sampler 8x8:
+ // X(i) = X0 + dx*i + ddx*i*(i-1)/2 ==> X(8) = X0 + dx*8 +ddx*28
+ // dx(i)= dx(0) + ddx*i ==> dx(8)= dx + ddx*8
+
+ //OPTIMIZATION: fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY - are sub registers of same GRF. Use NODDCLR NODDCHK. -rT
+
+ // Calculating X(8)
+ mov (1) acc0.2<1>:f r6.0<0;1,0>:f
+ mac (1) acc0.2<1>:f r3.0<0;1,0>:f 8.0:f
+ mac (1) r23.2<1>:f r7.5<0;1,0>:f 28.0:f { NoDDClr }
+
+ // Calculating Y(4)
+ mul (1) r23.1<1>:f r4.0<0;1,0>:f 4.0:f { NoDDClr, NoDDChk } //dY*4
+
+ // Calculating dx(8)
+ mov (1) acc0.4<1>:f r3.0<0;1,0>:f
+ mac (1) r23.4<1>:f r7.5<0;1,0>:f 8.0:f { NoDDClr, NoDDChk }
+
+ // Binding Index
+ mov (1) r23.5<1>:ud 0:ud { NoDDChk }
+
+
+SKIP_LAYER_L0:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen8/VP_Setup.g8a b/src/shaders/post_processing/gen8/VP_Setup.g8a
new file mode 100644
index 0000000..95f5fe2
--- /dev/null
+++ b/src/shaders/post_processing/gen8/VP_Setup.g8a
@@ -0,0 +1,440 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * Authors: Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+// 326 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: VP_Setup.asm
+// Author: Vivek Kumar
+// Description: Sets up all parameters for the Video Processing Kernel
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Setup pointer to the inline parameter
+
+// Copy MSG HDR
+ mov (8) r27.0<1>:ud r0.0<8;8,1>:ud // Initialize message payload header with R0
+
+// Only one layer is enough
+
+//temp; remove it once unread msg warnings are resolved -vK
+mov (8) r25<1>:ud r0.0<8;8,1>:ud
+mov (8) r26<1>:ud r0.0<8;8,1>:ud
+
+// Calculate StepX for all layers and overwrite it on the ratio
+ mul (8) r3.0<1>:f r3.0<8;8,1>:f r7.4<0;1,0>:f //StepX_ratio = StepX / VideoStepX
+
+ //Normalised Ratio of Horizontal step size with main video for all layers now becomes
+ //Normalised Horizontal step size for all layers
+
+// Calculate block origin for all layers and overwrite it on the frame origin
+ mov (2) r8.5<1>:f r7.0<2;2,1>:w //Convert origin from word to float
+
+ cmp.e.f0.0 (1) null<1>:d r2.26<0;1,0>:ub 1:uw
+
+
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 0:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L0
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L0
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L0
+
+ // rotate 0 degree
+ROTATE_0_L0:
+ (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.0<1>:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+ // rotate 90 degree
+ROTATE_90_L0:
+ (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.0<1>:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+ // rotate 180 degree
+ROTATE_180_L0:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.0<1>:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+ // rotate 270 degree
+ROTATE_270_L0:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.0<1>:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L0:
+ nop
diff --git a/src/shaders/post_processing/gen8/YUV_to_RGB.g8a b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a
new file mode 100644
index 0000000..2b968d8
--- /dev/null
+++ b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a
@@ -0,0 +1,971 @@
+/*
+ * Copyright 2000-2013 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: YUV_to_RGB.asm
+//
+// Convert YUV to RGB, handle it by 16x4 block
+//
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+//Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB
+
+.declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+.declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+.declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+
+.declare wTempR Base=r42.0 ElementSize=2 Type=w
+.declare wTempG Base=r44.0 ElementSize=2 Type=w
+.declare wTempB Base=r46.0 ElementSize=2 Type=w
+
+.declare ubTempR Base=r42.0 ElementSize=1 Type=ub
+.declare ubTempG Base=r44.0 ElementSize=1 Type=ub
+.declare ubTempB Base=r46.0 ElementSize=1 Type=ub
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare wTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+ // NTSC standard
+ // R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255))
+ // G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255))
+ // B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255))
+ // ITU-R conversion, Now we are using ITU-R conversion
+ // R = clip( Y + 1.402*(Cr-128)) // ITU-R
+ // G = clip( Y - 0.344*(Cb-128) - 0.714*(Cr-128))
+ // B = clip( Y + 1.772*(Cb-128))
+
+ // At the save module we have all 8 address sub-registers available.
+ // So we will use PING-PONG type of scheme to save the data using
+ // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+ // reduce dependency. - rT
+
+ //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+ //Offsets are zero for buffer 0 and buffer 4.
+ //Y/U/V is also stored as R/G/B for the internal purpose
+//for BUFFER_0
+ mov (4) a0.0<1>:uw r22.0<4;4,1>:uw
+//the first line in the block 0
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+
+ mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the second line in the block 0
+
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the third line in the block 0
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the fourth line in the block 0
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
+
+
+//for BUFFER_1
+ add (4) a0.0<1>:uw r22.0<4;4,1>:uw 512:uw
+//the first line in the block 1
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+
+ mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the second line in the block 1
+
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the third line in the block 1
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the fourth line in the block 1
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
+
+
+//for BUFFER_2
+ add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1024:uw
+//the first line in the block 2
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+
+ mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the second line in the block 2
+
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the third line in the block 2
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the fourth line in the block 2
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
+
+
+//for BUFFER_3
+ add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1536:uw
+//the first line in the block 3
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+
+ mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the second line in the block 3
+
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the third line in the block 3
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
+
+//the fourth line in the block 3
+ mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
+ mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
+ mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
+ mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
+ mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
+ mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
+
+ add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
+ add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
+ mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
+ mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
+
+ mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
+ mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
+
+ mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
+ mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
+
+ mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
+ mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
+
+ mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
+ mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
+ mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
+ mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
+ mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
+ mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
+
diff --git a/src/shaders/post_processing/gen8/pa_to_pa.asm b/src/shaders/post_processing/gen8/pa_to_pa.asm
new file mode 100644
index 0000000..44e3b35
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pa_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel YUY2_TO_NV12
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_VYUA.g8a"
+#include "PA_AVS_Buf_0.g8a"
+#include "PA_AVS_Buf_1.g8a"
+#include "PA_AVS_Buf_2.g8a"
+#include "PA_AVS_Buf_3.g8a"
+#include "Save_AVS_PA.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pa_to_pa.g8b b/src/shaders/post_processing/gen8/pa_to_pa.g8b
new file mode 100644
index 0000000..76fe27a
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pa_to_pa.g8b
@@ -0,0 +1,279 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x000062ea },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006420 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 },
+ { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 },
+ { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 },
+ { 0x00000401, 0x23680608, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pa_to_pl2.asm b/src/shaders/post_processing/gen8/pa_to_pl2.asm
new file mode 100644
index 0000000..adc81fd
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pa_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel YUY2_TO_NV12
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_VYUA.g8a"
+#include "PA_AVS_Buf_0.g8a"
+#include "PA_AVS_Buf_1.g8a"
+#include "PA_AVS_Buf_2.g8a"
+#include "PA_AVS_Buf_3.g8a"
+#include "Save_AVS_NV12.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pa_to_pl2.g8b b/src/shaders/post_processing/gen8/pa_to_pl2.g8b
new file mode 100644
index 0000000..3282c51
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pa_to_pl2.g8b
@@ -0,0 +1,236 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x000062ea },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006420 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 },
+ { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 },
+ { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 },
+ { 0x00000401, 0x23880608, 0x00000000, 0x000f000f },
+ { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 },
+ { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pa_to_pl3.asm b/src/shaders/post_processing/gen8/pa_to_pl3.asm
new file mode 100644
index 0000000..44c7f9e
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pa_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel YUY2_TO_NV12
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_VYUA.g8a"
+#include "PA_AVS_Buf_0.g8a"
+#include "PA_AVS_Buf_1.g8a"
+#include "PA_AVS_Buf_2.g8a"
+#include "PA_AVS_Buf_3.g8a"
+#include "Save_AVS_PL3.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pa_to_pl3.g8b b/src/shaders/post_processing/gen8/pa_to_pl3.g8b
new file mode 100644
index 0000000..3d1d087
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pa_to_pl3.g8b
@@ -0,0 +1,189 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x000062ea },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006420 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 },
+ { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 },
+ { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 },
+ { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 },
+ { 0x00000401, 0x23880608, 0x00000000, 0x000f000f },
+ { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 },
+ { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 },
+ { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 },
+ { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 },
+ { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 },
+ { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 },
+ { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 },
+ { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 },
+ { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 },
+ { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 },
+ { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 },
+ { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 },
+ { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 },
+ { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 },
+ { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 },
+ { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 },
+ { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 },
+ { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 },
+ { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 },
+ { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 },
+ { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 },
+ { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pl2_to_pa.asm b/src/shaders/post_processing/gen8/pl2_to_pa.asm
new file mode 100644
index 0000000..55d9ced
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PA
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_PL2.g8a"
+#include "PL2_AVS_Buf_0.g8a"
+#include "PL2_AVS_Buf_1.g8a"
+#include "PL2_AVS_Buf_2.g8a"
+#include "PL2_AVS_Buf_3.g8a"
+#include "Save_AVS_PA.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pl2_to_pa.g8b b/src/shaders/post_processing/gen8/pl2_to_pa.g8b
new file mode 100644
index 0000000..0c0cda1
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_pa.g8b
@@ -0,0 +1,287 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 },
+ { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 },
+ { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 },
+ { 0x00000401, 0x23680608, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pl2_to_pl2.asm b/src/shaders/post_processing/gen8/pl2_to_pl2.asm
new file mode 100644
index 0000000..0281854
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PL2
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_PL2.g8a"
+#include "PL2_AVS_Buf_0.g8a"
+#include "PL2_AVS_Buf_1.g8a"
+#include "PL2_AVS_Buf_2.g8a"
+#include "PL2_AVS_Buf_3.g8a"
+#include "Save_AVS_NV12.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pl2_to_pl2.g8b b/src/shaders/post_processing/gen8/pl2_to_pl2.g8b
new file mode 100644
index 0000000..fa72882
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_pl2.g8b
@@ -0,0 +1,244 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 },
+ { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 },
+ { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 },
+ { 0x00000401, 0x23880608, 0x00000000, 0x000f000f },
+ { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 },
+ { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pl2_to_pl3.asm b/src/shaders/post_processing/gen8/pl2_to_pl3.asm
new file mode 100644
index 0000000..042a834
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PL3
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_PL2.g8a"
+#include "PL2_AVS_Buf_0.g8a"
+#include "PL2_AVS_Buf_1.g8a"
+#include "PL2_AVS_Buf_2.g8a"
+#include "PL2_AVS_Buf_3.g8a"
+#include "Save_AVS_PL3.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pl2_to_pl3.g8b b/src/shaders/post_processing/gen8/pl2_to_pl3.g8b
new file mode 100644
index 0000000..28a951c
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_pl3.g8b
@@ -0,0 +1,197 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 },
+ { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 },
+ { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 },
+ { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 },
+ { 0x00000401, 0x23880608, 0x00000000, 0x000f000f },
+ { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 },
+ { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 },
+ { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 },
+ { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 },
+ { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 },
+ { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 },
+ { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 },
+ { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 },
+ { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 },
+ { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 },
+ { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 },
+ { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 },
+ { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 },
+ { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 },
+ { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 },
+ { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 },
+ { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 },
+ { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 },
+ { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 },
+ { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 },
+ { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 },
+ { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pl2_to_rgbx.asm b/src/shaders/post_processing/gen8/pl2_to_rgbx.asm
new file mode 100644
index 0000000..58a5204
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_rgbx.asm
@@ -0,0 +1,18 @@
+// Module name: AVS
+.kernel PL2_TO_PL2
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_PL2.g8a"
+#include "PL2_AVS_Buf_0.g8a"
+#include "PL2_AVS_Buf_1.g8a"
+#include "PL2_AVS_Buf_2.g8a"
+#include "PL2_AVS_Buf_3.g8a"
+#include "YUV_to_RGB.g8a"
+#include "Save_AVS_RGBX.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b
new file mode 100644
index 0000000..9ee29c2
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b
@@ -0,0 +1,738 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x22001240, 0x006902c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 },
+ { 0x00400040, 0x22001240, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 },
+ { 0x00400040, 0x22001240, 0x166902c0, 0x04000400 },
+ { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 },
+ { 0x00400040, 0x22001240, 0x166902c0, 0x06000600 },
+ { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 },
+ { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 },
+ { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 },
+ { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 },
+ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 },
+ { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 },
+ { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 },
+ { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 },
+ { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 },
+ { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 },
+ { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 },
+ { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 },
+ { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 },
+ { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x000062ea },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x02800005, 0x20001260, 0x1e000046, 0x00010001 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00010001, 0x22201048, 0x00000200, 0x00000000 },
+ { 0x00010001, 0x22001040, 0x00000204, 0x00000000 },
+ { 0x00010001, 0x22041240, 0x00000220, 0x00000000 },
+ { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00020002 },
+ { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 },
+ { 0x00000401, 0x23680608, 0x00000000, 0x0003001f },
+ { 0x00400001, 0x22081040, 0x00690200, 0x00000000 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x26e00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x23e00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25000208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x26200208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x27400208, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a00a28, 0x0e000360, 0x00000020 },
+ { 0x00000040, 0x25c40a28, 0x0e000364, 0x00000004 },
+ { 0x00000040, 0x26e40a28, 0x0e000364, 0x00000004 },
+ { 0x00000040, 0x26e00a28, 0x0e000360, 0x00000020 },
+ { 0x00600001, 0x63a02288, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x63a12288, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x63a22288, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x63a32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x63c02288, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x63c12288, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x63c22288, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x63c32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x64c02288, 0x00ae8011, 0x00000000 },
+ { 0x00600001, 0x64c12288, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x64c22288, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x64c32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x64e02288, 0x00ae8031, 0x00000000 },
+ { 0x00600001, 0x64e12288, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x64e22288, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x64e32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x63e02288, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x63e12288, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x63e22288, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x63e32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x64002288, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x64012288, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x64022288, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x64032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x65002288, 0x00ae8051, 0x00000000 },
+ { 0x00600001, 0x65012288, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x65022288, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x65032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x65202288, 0x00ae8071, 0x00000000 },
+ { 0x00600001, 0x65212288, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x65222288, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x65232288, 0x0000005f, 0x00000000 },
+ { 0x0c800031, 0x20002220, 0x06000380, 0x0a0a8018 },
+ { 0x0c800031, 0x20002220, 0x060004a0, 0x0a0a8018 },
+ { 0x00400040, 0x22001040, 0x16690208, 0x02000200 },
+ { 0x00600001, 0x65e02288, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x65e12288, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x65e22288, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x65e32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x66002288, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x66012288, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x66022288, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x66032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67002288, 0x00ae8011, 0x00000000 },
+ { 0x00600001, 0x67012288, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x67022288, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x67032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67202288, 0x00ae8031, 0x00000000 },
+ { 0x00600001, 0x67212288, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x67222288, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x67232288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x66202288, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x66212288, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x66222288, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x66232288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x66402288, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x66412288, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x66422288, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x66432288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67402288, 0x00ae8051, 0x00000000 },
+ { 0x00600001, 0x67412288, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x67422288, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x67432288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67602288, 0x00ae8071, 0x00000000 },
+ { 0x00600001, 0x67612288, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x67622288, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x67632288, 0x0000005f, 0x00000000 },
+ { 0x0c800031, 0x20002220, 0x060005c0, 0x0a0a8018 },
+ { 0x0c800031, 0x20002220, 0x060006e0, 0x0a0a8018 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x26e00208, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 },
+ { 0x00000040, 0x24a00a28, 0x0e000360, 0x00000020 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000008 },
+ { 0x00000040, 0x25c40a28, 0x0e000364, 0x0000000c },
+ { 0x00000040, 0x26e40a28, 0x0e000364, 0x0000000c },
+ { 0x00000040, 0x26e00a28, 0x0e000360, 0x00000020 },
+ { 0x00400040, 0x22001040, 0x16690208, 0x04000400 },
+ { 0x00600001, 0x63a02288, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x63a12288, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x63a22288, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x63a32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x63c02288, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x63c12288, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x63c22288, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x63c32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x64c02288, 0x00ae8011, 0x00000000 },
+ { 0x00600001, 0x64c12288, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x64c22288, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x64c32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x64e02288, 0x00ae8031, 0x00000000 },
+ { 0x00600001, 0x64e12288, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x64e22288, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x64e32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x63e02288, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x63e12288, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x63e22288, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x63e32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x64002288, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x64012288, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x64022288, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x64032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x65002288, 0x00ae8051, 0x00000000 },
+ { 0x00600001, 0x65012288, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x65022288, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x65032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x65202288, 0x00ae8071, 0x00000000 },
+ { 0x00600001, 0x65212288, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x65222288, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x65232288, 0x0000005f, 0x00000000 },
+ { 0x0c800031, 0x20002220, 0x06000380, 0x0a0a8018 },
+ { 0x0c800031, 0x20002220, 0x060004a0, 0x0a0a8018 },
+ { 0x00400040, 0x22001040, 0x16690208, 0x06000600 },
+ { 0x00600001, 0x65e02288, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x65e12288, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x65e22288, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x65e32288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x66002288, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x66012288, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x66022288, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x66032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67002288, 0x00ae8011, 0x00000000 },
+ { 0x00600001, 0x67012288, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x67022288, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x67032288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67202288, 0x00ae8031, 0x00000000 },
+ { 0x00600001, 0x67212288, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x67222288, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x67232288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x66202288, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x66212288, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x66222288, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x66232288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x66402288, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x66412288, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x66422288, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x66432288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67402288, 0x00ae8051, 0x00000000 },
+ { 0x00600001, 0x67412288, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x67422288, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x67432288, 0x0000005f, 0x00000000 },
+ { 0x00600001, 0x67602288, 0x00ae8071, 0x00000000 },
+ { 0x00600001, 0x67612288, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x67622288, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x67632288, 0x0000005f, 0x00000000 },
+ { 0x0c800031, 0x20002220, 0x060005c0, 0x0a0a8018 },
+ { 0x0c800031, 0x20002220, 0x060006e0, 0x0a0a8018 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pl3_to_pa.asm b/src/shaders/post_processing/gen8/pl3_to_pa.asm
new file mode 100644
index 0000000..acb7670
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl3_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PL3
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_PL3.g8a"
+#include "PL3_AVS_Buf_0.g8a"
+#include "PL3_AVS_Buf_1.g8a"
+#include "PL3_AVS_Buf_2.g8a"
+#include "PL3_AVS_Buf_3.g8a"
+#include "Save_AVS_PA.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pl3_to_pa.g8b b/src/shaders/post_processing/gen8/pl3_to_pa.g8b
new file mode 100644
index 0000000..d6798c2
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl3_to_pa.g8b
@@ -0,0 +1,303 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 },
+ { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 },
+ { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 },
+ { 0x00000401, 0x23680608, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 },
+ { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 },
+ { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 },
+ { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 },
+ { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 },
+ { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 },
+ { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 },
+ { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 },
+ { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 },
+ { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 },
+ { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 },
+ { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 },
+ { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 },
+ { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 },
+ { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 },
+ { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 },
+ { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 },
+ { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 },
+ { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 },
+ { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 },
+ { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 },
+ { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 },
+ { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 },
+ { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 },
+ { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 },
+ { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c },
+ { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e },
+ { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 },
+ { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 },
+ { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 },
+ { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 },
+ { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 },
+ { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 },
+ { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 },
+ { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 },
+ { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 },
+ { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 },
+ { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 },
+ { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.asm b/src/shaders/post_processing/gen8/pl3_to_pl2.asm
new file mode 100644
index 0000000..713cb97
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl3_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PL2
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_PL3.g8a"
+#include "PL3_AVS_Buf_0.g8a"
+#include "PL3_AVS_Buf_1.g8a"
+#include "PL3_AVS_Buf_2.g8a"
+#include "PL3_AVS_Buf_3.g8a"
+#include "Save_AVS_NV12.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.g8b b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b
new file mode 100644
index 0000000..9a141e7
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b
@@ -0,0 +1,260 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 },
+ { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 },
+ { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 },
+ { 0x00000401, 0x23880608, 0x00000000, 0x000f000f },
+ { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 },
+ { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/pl3_to_pl3.asm b/src/shaders/post_processing/gen8/pl3_to_pl3.asm
new file mode 100644
index 0000000..f6a2a76
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl3_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PL3
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_PL3.g8a"
+#include "PL3_AVS_Buf_0.g8a"
+#include "PL3_AVS_Buf_1.g8a"
+#include "PL3_AVS_Buf_2.g8a"
+#include "PL3_AVS_Buf_3.g8a"
+#include "Save_AVS_PL3.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/pl3_to_pl3.g8b b/src/shaders/post_processing/gen8/pl3_to_pl3.g8b
new file mode 100644
index 0000000..67ac99c
--- /dev/null
+++ b/src/shaders/post_processing/gen8/pl3_to_pl3.g8b
@@ -0,0 +1,213 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 },
+ { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 },
+ { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 },
+ { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 },
+ { 0x00000401, 0x23880608, 0x00000000, 0x000f000f },
+ { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 },
+ { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 },
+ { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 },
+ { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 },
+ { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 },
+ { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 },
+ { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 },
+ { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 },
+ { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 },
+ { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 },
+ { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 },
+ { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 },
+ { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 },
+ { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 },
+ { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 },
+ { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 },
+ { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 },
+ { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 },
+ { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 },
+ { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 },
+ { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 },
+ { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/rgbx_to_nv12.asm b/src/shaders/post_processing/gen8/rgbx_to_nv12.asm
new file mode 100644
index 0000000..14baafe
--- /dev/null
+++ b/src/shaders/post_processing/gen8/rgbx_to_nv12.asm
@@ -0,0 +1,18 @@
+// Module name: AVS
+.kernel RGBX_TO_NV12
+.code
+
+#include "VP_Setup.g8a"
+#include "Set_Layer_0.g8a"
+#include "Set_AVS_Buf_0123_BGRA.g8a"
+#include "PA_AVS_Buf_0.g8a"
+#include "PA_AVS_Buf_1.g8a"
+#include "PA_AVS_Buf_2.g8a"
+#include "PA_AVS_Buf_3.g8a"
+#include "RGB_to_YUV.g8a"
+#include "Save_AVS_NV12.g8a"
+#include "EOT.g8a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b b/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b
new file mode 100644
index 0000000..4cc113b
--- /dev/null
+++ b/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b
@@ -0,0 +1,661 @@
+ { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 },
+ { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 },
+ { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 },
+ { 0x00000008, 0x22201248, 0x16000044, 0x00000000 },
+ { 0x00000005, 0x22201248, 0x16000220, 0x00030003 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00010001 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00020002 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 },
+ { 0x01000010, 0x20001261, 0x16000220, 0x00030003 },
+ { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 },
+ { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 },
+ { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 },
+ { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 },
+ { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 },
+ { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 },
+ { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 },
+ { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff },
+ { 0x00000005, 0x23021288, 0x16000044, 0x00030003 },
+ { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 },
+ { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 },
+ { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 },
+ { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 },
+ { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 },
+ { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 },
+ { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 },
+ { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 },
+ { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 },
+ { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00460046 },
+ { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 },
+ { 0x00400001, 0x24003660, 0x30000000, 0x00006204 },
+ { 0x00400040, 0x24001860, 0x16690400, 0x00400040 },
+ { 0x00400209, 0x22401868, 0x16690400, 0x00050005 },
+ { 0x00000401, 0x22500608, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x28002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000001 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000002 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 },
+ { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 },
+ { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 },
+ { 0x00000001, 0x22080608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23240608, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 },
+ { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x22001240, 0x006902c0, 0x00000000 },
+ { 0x00400001, 0x22081240, 0x006902c0, 0x00000000 },
+ { 0x02800005, 0x20001260, 0x1e000040, 0x00010001 },
+ { 0x00010001, 0x22201048, 0x00000200, 0x00000000 },
+ { 0x00010001, 0x22001040, 0x00000202, 0x00000000 },
+ { 0x00010001, 0x22021240, 0x00000220, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 },
+ { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 },
+ { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 },
+ { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 },
+ { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 },
+ { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 },
+ { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 },
+ { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 },
+ { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 },
+ { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 },
+ { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 },
+ { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 },
+ { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 },
+ { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 },
+ { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 },
+ { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 },
+ { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee },
+ { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 },
+ { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d },
+ { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 },
+ { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 },
+ { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 },
+ { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xac601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 },
+ { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 },
+ { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 },
+ { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 },
+ { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 },
+ { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 },
+ { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 },
+ { 0x00000401, 0x23880608, 0x00000000, 0x000f000f },
+ { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 },
+ { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 },
+ { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 },
+ { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 },
+ { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 },
+ { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 },
+ { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 },
+ { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 },
+ { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 },
+ { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 },
+ { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 },
+ { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 },
+ { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 },
+ { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 },
+ { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 },
+ { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 },
+ { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 },
+ { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 },
+ { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 },
+ { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 },
+ { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 },
+ { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 },
+ { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 },
+ { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 },
+ { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 },
+ { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 },
+ { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 },
+ { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 },
+ { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 },
+ { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 },
+ { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 },
+ { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 },
+ { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 },
+ { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 },
+ { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 },
+ { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 },
+ { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 },
+ { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 },
+ { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 },
+ { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/sharpening_h_blur.g8b b/src/shaders/post_processing/gen8/sharpening_h_blur.g8b
new file mode 100644
index 0000000..ffa759b
--- /dev/null
+++ b/src/shaders/post_processing/gen8/sharpening_h_blur.g8b
@@ -0,0 +1,1718 @@
+{ 0x00000001, 0x23401608, 0x00000000, 0x00000000 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000001, 0x23441608, 0x00000000, 0x00020002 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x000f0007 },
+{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000340, 0x02490000 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x00000001, 0x202c1608, 0x00000000, 0x00040004 },
+{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 },
+{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc },
+{ 0x06000010, 0x20000201, 0x16000024, 0x00040004 },
+{ 0x00200001, 0x23283ae8, 0x004500b0, 0x00000000 },
+{ 0x00200001, 0x23083ae8, 0x004500a0, 0x00000000 },
+{ 0x00200001, 0x22e83ae8, 0x00450090, 0x00000000 },
+{ 0x00200001, 0x22c83ae8, 0x00450080, 0x00000000 },
+{ 0x00200001, 0x22a83ae8, 0x00450070, 0x00000000 },
+{ 0x00200001, 0x22883ae8, 0x00450060, 0x00000000 },
+{ 0x00200001, 0x22683ae8, 0x00450050, 0x00000000 },
+{ 0x00200001, 0x22483ae8, 0x00450040, 0x00000000 },
+{ 0x00200001, 0x23383ae8, 0x004500b8, 0x00000000 },
+{ 0x00200001, 0x23183ae8, 0x004500a8, 0x00000000 },
+{ 0x00200001, 0x22f83ae8, 0x00450098, 0x00000000 },
+{ 0x00200001, 0x22d83ae8, 0x00450088, 0x00000000 },
+{ 0x00200001, 0x22b83ae8, 0x00450078, 0x00000000 },
+{ 0x00200001, 0x22983ae8, 0x00450068, 0x00000000 },
+{ 0x00200001, 0x22783ae8, 0x00450058, 0x00000000 },
+{ 0x00200001, 0x22583ae8, 0x00450048, 0x00000000 },
+{ 0x00010020, 0x34000005, 0x0e001400, 0x000067d0 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000340, 0x02490000 },
+{ 0x00200001, 0x22603ae8, 0x00450268, 0x00000000 },
+{ 0x00200001, 0x22403ae8, 0x00450248, 0x00000000 },
+{ 0x00200001, 0x22803ae8, 0x00450288, 0x00000000 },
+{ 0x00200001, 0x22c03ae8, 0x004502c8, 0x00000000 },
+{ 0x00200001, 0x22a03ae8, 0x004502a8, 0x00000000 },
+{ 0x00200001, 0x23203ae8, 0x00450328, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x000f0007 },
+{ 0x00200001, 0x23003ae8, 0x00450308, 0x00000000 },
+{ 0x00200001, 0x22e03ae8, 0x004502e8, 0x00000000 },
+{ 0x00000040, 0x20400208, 0x1600002c, 0x00040004 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 },
+{ 0x00200001, 0x22703ae8, 0x00450278, 0x00000000 },
+{ 0x00200001, 0x22503ae8, 0x00450258, 0x00000000 },
+{ 0x00200001, 0x22683ae8, 0x00450050, 0x00000000 },
+{ 0x00200001, 0x22483ae8, 0x00450040, 0x00000000 },
+{ 0x00200001, 0x22783ae8, 0x00450058, 0x00000000 },
+{ 0x00200001, 0x22583ae8, 0x00450048, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000240, 0x00000000 },
+{ 0x00200001, 0x22903ae8, 0x00450298, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000250, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000260, 0x00000000 },
+{ 0x00200001, 0x22883ae8, 0x00450060, 0x00000000 },
+{ 0x00200001, 0x22d03ae8, 0x004502d8, 0x00000000 },
+{ 0x00200001, 0x22b03ae8, 0x004502b8, 0x00000000 },
+{ 0x00200001, 0x22983ae8, 0x00450068, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000270, 0x00000000 },
+{ 0x00200001, 0x22c83ae8, 0x00450080, 0x00000000 },
+{ 0x00200001, 0x22a83ae8, 0x00450070, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000280, 0x00000000 },
+{ 0x00200001, 0x22d83ae8, 0x00450088, 0x00000000 },
+{ 0x00200001, 0x22b83ae8, 0x00450078, 0x00000000 },
+{ 0x00200001, 0x23303ae8, 0x00450338, 0x00000000 },
+{ 0x00200001, 0x23103ae8, 0x00450318, 0x00000000 },
+{ 0x00200001, 0x22f03ae8, 0x004502f8, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000290, 0x00000000 },
+{ 0x00200001, 0x22e83ae8, 0x00450090, 0x00000000 },
+{ 0x00200001, 0x23083ae8, 0x004500a0, 0x00000000 },
+{ 0x00200001, 0x23283ae8, 0x004500b0, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a0, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c0, 0x00000000 },
+{ 0x00000001, 0x206422e8, 0x000002d0, 0x00000000 },
+{ 0x00000001, 0x205c22e8, 0x000002b0, 0x00000000 },
+{ 0x00200001, 0x23383ae8, 0x004500b8, 0x00000000 },
+{ 0x00200001, 0x23183ae8, 0x004500a8, 0x00000000 },
+{ 0x00200001, 0x22f83ae8, 0x00450098, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e0, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c1, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000241, 0x00000000 },
+{ 0x00000001, 0x20303ee8, 0x00000000, 0x332bcc77 },
+{ 0x00000001, 0x21403ee8, 0x00000000, 0x3c1d98ad },
+{ 0x00000001, 0x206c22e8, 0x000002f0, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d1, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000251, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000261, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e1, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f1, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000271, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000281, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000301, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000311, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000291, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a1, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000321, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000331, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b1, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x208022e8, 0x00000242, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c2, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000252, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000262, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000272, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000282, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000292, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a2, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x00000001, 0x209c22e8, 0x000002b2, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d2, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e2, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000300, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f2, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000310, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000302, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000320, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000312, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 },
+{ 0x00000001, 0x207c22e8, 0x00000330, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000322, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x20bc22e8, 0x00000332, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 },
+{ 0x00000001, 0x21503ee8, 0x00000000, 0x3e525448 },
+{ 0x00000001, 0x21603ee8, 0x00000000, 0x3f11e168 },
+{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 },
+{ 0x00000001, 0x21703ee8, 0x00000000, 0x3875735f },
+{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x21803a28, 0x00000100, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x61a00a88, 0x00000180, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c1, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000241, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000242, 0x00000000 },
+{ 0x00000001, 0x61a80a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 },
+{ 0x00000001, 0x61b00a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x61b80a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x61c00a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x61c80a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x61d00a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x61d80a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x61e00a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x61e80a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x61f00a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x61f80a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x206422e8, 0x000002d1, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000251, 0x00000000 },
+{ 0x00000001, 0x62000a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000261, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e1, 0x00000000 },
+{ 0x00000001, 0x206c22e8, 0x000002f1, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000271, 0x00000000 },
+{ 0x00000001, 0x62080a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000281, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000301, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000252, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000311, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000291, 0x00000000 },
+{ 0x00000001, 0x62100a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000262, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a1, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000321, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000272, 0x00000000 },
+{ 0x00000001, 0x207c22e8, 0x00000331, 0x00000000 },
+{ 0x00000001, 0x205c22e8, 0x000002b1, 0x00000000 },
+{ 0x00000001, 0x62180a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000282, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c2, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000292, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d2, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e2, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f2, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000302, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000312, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a2, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000322, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000332, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b2, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 },
+{ 0x00000001, 0x61a10a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x61a90a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 },
+{ 0x00000001, 0x61b10a88, 0x00000220, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x61b90a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x61c10a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x61c90a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x61d10a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x61d90a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c2, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000242, 0x00000000 },
+{ 0x00000001, 0x61e10a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x61e90a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x61f10a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 },
+{ 0x00000001, 0x61f90a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x206422e8, 0x000002d2, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000252, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x62010a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000262, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e2, 0x00000000 },
+{ 0x00000001, 0x206c22e8, 0x000002f2, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000272, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x62090a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000282, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000302, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000312, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000292, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x62110a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a2, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000322, 0x00000000 },
+{ 0x00000001, 0x207c22e8, 0x00000332, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x205c22e8, 0x000002b2, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x62190a88, 0x00000220, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x41a22288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 },
+{ 0x00000001, 0x41aa2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x41b22288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x41ba2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x41c22288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x41ca2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x41d22288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x41da2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x41e22288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x41ea2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000243, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c3, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x41f22288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x41fa2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000253, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000263, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 },
+{ 0x00000001, 0x206422e8, 0x000002d3, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x42022288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e3, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000273, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000283, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 },
+{ 0x00000001, 0x206c22e8, 0x000002f3, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x420a2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000303, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000293, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a3, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000313, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x42122288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000323, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x205c22e8, 0x000002b3, 0x00000000 },
+{ 0x00000001, 0x207c22e8, 0x00000333, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x00000001, 0x421a2288, 0x00000024, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x41a32288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 },
+{ 0x00000001, 0x41ab2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x41b32288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x41bb2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x41c32288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x41cb2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x41d32288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x41db2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x41e32288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x41eb2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c4, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000244, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x41f32288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x41fb2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 },
+{ 0x00000001, 0x206422e8, 0x000002d4, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000254, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x42032288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000264, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e4, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 },
+{ 0x00000001, 0x206c22e8, 0x000002f4, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000274, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x420b2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000284, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000304, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000314, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000294, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x42132288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a4, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000324, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x207c22e8, 0x00000334, 0x00000000 },
+{ 0x00000001, 0x205c22e8, 0x000002b4, 0x00000000 },
+{ 0x00000001, 0x421b2288, 0x00000024, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 },
+{ 0x00000001, 0x61a40a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x61ac0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 },
+{ 0x00000001, 0x61b40a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x61bc0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x61c40a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x61cc0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x61d40a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x61dc0a88, 0x00000220, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x61e40a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x61ec0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x61f40a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x61fc0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x62040a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x620c0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c5, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000245, 0x00000000 },
+{ 0x00000001, 0x62140a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x206422e8, 0x000002d5, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000255, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000265, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e5, 0x00000000 },
+{ 0x00000001, 0x206c22e8, 0x000002f5, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000275, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000285, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000305, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000315, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000295, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a5, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000325, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x207c22e8, 0x00000335, 0x00000000 },
+{ 0x00000001, 0x205c22e8, 0x000002b5, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x621c0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000246, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c6, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 },
+{ 0x00000001, 0x61a50a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x61ad0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 },
+{ 0x00000001, 0x61b50a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x61bd0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x61c50a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x61cd0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x61d50a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x61dd0a88, 0x00000220, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x61e50a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x61ed0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x61f50a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x61fd0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000256, 0x00000000 },
+{ 0x00000001, 0x62050a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000266, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000276, 0x00000000 },
+{ 0x00000001, 0x620d0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000286, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000296, 0x00000000 },
+{ 0x00000001, 0x62150a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x206422e8, 0x000002d6, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a6, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e6, 0x00000000 },
+{ 0x00000001, 0x205c22e8, 0x000002b6, 0x00000000 },
+{ 0x00000001, 0x621d0a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x206c22e8, 0x000002f6, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000306, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000316, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000326, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x207c22e8, 0x00000336, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x208022e8, 0x0000024e, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025e, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002ce, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002de, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026e, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027e, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ee, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fe, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028e, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029e, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030e, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031e, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ae, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002be, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032e, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033e, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x41a62288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x41ae2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x204022e8, 0x00000247, 0x00000000 },
+{ 0x00000001, 0x206022e8, 0x000002c7, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x41b62288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x41be2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x41c62288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x41ce2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x41d62288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x41de2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x41e62288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x41ee2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x41f62288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x41fe2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x204422e8, 0x00000257, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x42062288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x204822e8, 0x00000267, 0x00000000 },
+{ 0x00000001, 0x204c22e8, 0x00000277, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x420e2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x205022e8, 0x00000287, 0x00000000 },
+{ 0x00000001, 0x205422e8, 0x00000297, 0x00000000 },
+{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x42162288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 },
+{ 0x00000001, 0x205822e8, 0x000002a7, 0x00000000 },
+{ 0x00000001, 0x206422e8, 0x000002d7, 0x00000000 },
+{ 0x00000001, 0x205c22e8, 0x000002b7, 0x00000000 },
+{ 0x00000001, 0x421e2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x206822e8, 0x000002e7, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 },
+{ 0x00000001, 0x206c22e8, 0x000002f7, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 },
+{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 },
+{ 0x00000001, 0x207022e8, 0x00000307, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 },
+{ 0x00000001, 0x207422e8, 0x00000317, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 },
+{ 0x00000001, 0x207822e8, 0x00000327, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 },
+{ 0x00000001, 0x207c22e8, 0x00000337, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 },
+{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 },
+{ 0x00000001, 0x208022e8, 0x0000024e, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00000001, 0x20a022e8, 0x000002ce, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025e, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026e, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002de, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ee, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027e, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028e, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002fe, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030e, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029e, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002ae, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031e, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032e, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002be, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 },
+{ 0x00000001, 0x20bc22e8, 0x0000033e, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 },
+{ 0x00000001, 0x208022e8, 0x0000024f, 0x00000000 },
+{ 0x00000001, 0x208422e8, 0x0000025f, 0x00000000 },
+{ 0x00000001, 0x20a022e8, 0x000002cf, 0x00000000 },
+{ 0x00000001, 0x20a422e8, 0x000002df, 0x00000000 },
+{ 0x00000001, 0x208822e8, 0x0000026f, 0x00000000 },
+{ 0x00000001, 0x208c22e8, 0x0000027f, 0x00000000 },
+{ 0x00000001, 0x20a822e8, 0x000002ef, 0x00000000 },
+{ 0x00000001, 0x20ac22e8, 0x000002ff, 0x00000000 },
+{ 0x00000001, 0x209022e8, 0x0000028f, 0x00000000 },
+{ 0x00000001, 0x209422e8, 0x0000029f, 0x00000000 },
+{ 0x00000001, 0x20b022e8, 0x0000030f, 0x00000000 },
+{ 0x00000001, 0x20b422e8, 0x0000031f, 0x00000000 },
+{ 0x00000001, 0x209822e8, 0x000002af, 0x00000000 },
+{ 0x00000001, 0x209c22e8, 0x000002bf, 0x00000000 },
+{ 0x00000001, 0x20b822e8, 0x0000032f, 0x00000000 },
+{ 0x00000001, 0x20bc22e8, 0x0000033f, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00000001, 0x20403a28, 0x00000100, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000104, 0x00000000 },
+{ 0x00000001, 0x41a72288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x41af2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000108, 0x00000000 },
+{ 0x00600001, 0x21803ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000344, 0x0a0a8000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x0000010c, 0x00000000 },
+{ 0x00000001, 0x41b72288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000110, 0x00000000 },
+{ 0x00000001, 0x41bf2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000114, 0x00000000 },
+{ 0x00000001, 0x41c72288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000118, 0x00000000 },
+{ 0x00000001, 0x41cf2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x0000011c, 0x00000000 },
+{ 0x00000001, 0x41d72288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000120, 0x00000000 },
+{ 0x00000001, 0x41df2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000124, 0x00000000 },
+{ 0x00000001, 0x41e72288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000128, 0x00000000 },
+{ 0x00000001, 0x41ef2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x0000012c, 0x00000000 },
+{ 0x00000001, 0x41f72288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000130, 0x00000000 },
+{ 0x00000001, 0x41ff2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000134, 0x00000000 },
+{ 0x00000001, 0x42072288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x00000138, 0x00000000 },
+{ 0x00000001, 0x420f2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x20403a28, 0x0000013c, 0x00000000 },
+{ 0x00000001, 0x42172288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x21880608, 0x00000000, 0x000f0007 },
+{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 },
+{ 0x00000001, 0x21803ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x421f2288, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x21843ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20003a00, 0x00000180, 0x00000200 },
+{ 0x00000040, 0x202c0208, 0x1600002c, 0x00080008 },
+{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc },
+{ 0x05000010, 0x20000200, 0x0200002c, 0x00000024 },
+{ 0x00010020, 0x34000004, 0x0e001400, 0xffff9830 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000340, 0x02290000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 },
+{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 },
+{ 0x00800001, 0x20603ae8, 0x008d0040, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000344, 0x060a8000 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 },
+{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 },
+{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000340, 0x02290000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 },
+{ 0x00000001, 0x20403ae8, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 },
+{ 0x00800001, 0x20603ae8, 0x008d0040, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000344, 0x060a8000 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 },
+{ 0x00000001, 0x20403ae8, 0x00000024, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 },
+{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 },
+{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 },
diff --git a/src/shaders/post_processing/gen8/sharpening_unmask.g8b b/src/shaders/post_processing/gen8/sharpening_unmask.g8b
new file mode 100644
index 0000000..f27a2d5
--- /dev/null
+++ b/src/shaders/post_processing/gen8/sharpening_unmask.g8b
@@ -0,0 +1,159 @@
+{ 0x00000001, 0x21281608, 0x00000000, 0x00000000 },
+{ 0x00000001, 0x202c1608, 0x00000000, 0x00000000 },
+{ 0x00000001, 0x21481608, 0x00000000, 0x00050005 },
+{ 0x00000001, 0x21681608, 0x00000000, 0x00040004 },
+{ 0x00000001, 0x21881608, 0x00000000, 0x00020002 },
+{ 0x00000001, 0x21081608, 0x00000000, 0x00010001 },
+{ 0x06000010, 0x20000202, 0x16000020, 0x00000000 },
+{ 0x00010020, 0x34000006, 0x0e001400, 0x00000530 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000128, 0x02190000 },
+{ 0x06000010, 0x20000201, 0x16000030, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x00030003 },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20603a08, 0x00000040, 0x00000200 },
+{ 0x00000040, 0x22000200, 0x06000188, 0x02190000 },
+{ 0x00400001, 0x21a03ae8, 0x00690060, 0x00000000 },
+{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 },
+{ 0x00400001, 0x21c03ae8, 0x00690040, 0x00000000 },
+{ 0x00010020, 0x34000005, 0x0e001400, 0x00000150 },
+{ 0x00000001, 0x21821e68, 0x00000000, 0x00800080 },
+{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 },
+{ 0x00400001, 0x21503ae8, 0x006901c0, 0x00000000 },
+{ 0x0080802c, 0x21600008, 0x0e490000, 0x00000460 },
+{ 0x00000001, 0x210c1e68, 0x00000000, 0x00800080 },
+{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 },
+{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 },
+{ 0x0080802c, 0x21200008, 0x0e490000, 0x000004e0 },
+{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 },
+{ 0x00000001, 0x21801e68, 0x00000000, 0x00800080 },
+{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 },
+{ 0x0080802c, 0x21000008, 0x0e490000, 0x00000550 },
+{ 0x00000001, 0x21801e68, 0x00000000, 0x00800080 },
+{ 0x0080802c, 0x21000008, 0x0e490000, 0x00000530 },
+{ 0x00000001, 0x41800268, 0x00000030, 0x00000000 },
+{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 },
+{ 0x00000001, 0x20241a68, 0x00000180, 0x00000000 },
+{ 0x00400001, 0x21503ae8, 0x006901a0, 0x00000000 },
+{ 0x0080802c, 0x21000008, 0x0e490000, 0x000004e0 },
+{ 0x00400001, 0x21903ae8, 0x00690150, 0x00000000 },
+{ 0x00000020, 0x34000004, 0x0e001400, 0x00000010 },
+{ 0x00400001, 0x21903ae8, 0x006901a0, 0x00000000 },
+{ 0x01000010, 0x20000200, 0x16000034, 0x00000000 },
+{ 0x00010020, 0x34000004, 0x0e001400, 0x00000160 },
+{ 0x00000001, 0x21121e68, 0x00000000, 0x00800080 },
+{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 },
+{ 0x00400001, 0x21503ae8, 0x006901c0, 0x00000000 },
+{ 0x0080802c, 0x21800008, 0x0e490000, 0x00000560 },
+{ 0x00000001, 0x210c1e68, 0x00000000, 0x00800080 },
+{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 },
+{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 },
+{ 0x0080802c, 0x21200008, 0x0e490000, 0x00000360 },
+{ 0x00600040, 0x41502288, 0x1eae4150, 0x00ff00ff },
+{ 0x00000001, 0x21101e68, 0x00000000, 0x00800080 },
+{ 0x00600040, 0x41512288, 0x1eae4151, 0x00ff00ff },
+{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 },
+{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000590 },
+{ 0x00000001, 0x21101e68, 0x00000000, 0x00800080 },
+{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000570 },
+{ 0x00000001, 0x41100268, 0x00000034, 0x00000000 },
+{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 },
+{ 0x00000001, 0x20241a68, 0x00000110, 0x00000000 },
+{ 0x00400001, 0x21503ae8, 0x00690190, 0x00000000 },
+{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000520 },
+{ 0x00400001, 0x21e03ae8, 0x00690150, 0x00000000 },
+{ 0x00000020, 0x34000004, 0x0e001400, 0x00000010 },
+{ 0x00400001, 0x21e03ae8, 0x00690190, 0x00000000 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000168, 0x040a8000 },
+{ 0x00400001, 0x20603ae8, 0x006901e0, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x00030003 },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 },
+{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 },
+{ 0x0000000c, 0x20240208, 0x16000028, 0x00010001 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000108, 0x02190000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x00010003 },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000024, 0x00000000 },
+{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 },
+{ 0x00200001, 0x20603ae8, 0x00450040, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000148, 0x040a8000 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x00010003 },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000024, 0x00000000 },
+{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 },
+{ 0x00000040, 0x202c0208, 0x1600002c, 0x00040004 },
+{ 0x05000010, 0x20000203, 0x0200002c, 0x00000020 },
+{ 0x00010020, 0x34000007, 0x0e001400, 0xfffffad0 },
+{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 },
+{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 },
+{ 0x00000040, 0x20241a28, 0x1e004182, 0x00800080 },
+{ 0x00800041, 0x20802228, 0x1ab10170, 0x00000182 },
+{ 0x05600010, 0x20002260, 0x22ae0170, 0x00ae0150 },
+{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0158 },
+{ 0x00600041, 0x20400a28, 0x22000024, 0x008d0150 },
+{ 0x00800040, 0x20400a28, 0x0a8d0080, 0x008d0040 },
+{ 0x0080000c, 0x40400a68, 0x1e8d0040, 0x00070007 },
+{ 0x05601010, 0x20002260, 0x22ae0171, 0x00ae0151 },
+{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 },
+{ 0x00610001, 0x41501a88, 0x00ae0040, 0x00000000 },
+{ 0x00611001, 0x41511a88, 0x00ae0042, 0x00000000 },
+{ 0x0080002d, 0x20000220, 0x00450160, 0x00000000 },
+{ 0x00800040, 0x20402268, 0x22b10150, 0x00b14170 },
+{ 0x00000040, 0x20241a28, 0x1e00410c, 0x00800080 },
+{ 0x05800010, 0x20001a62, 0x1eb10040, 0x00000000 },
+{ 0x00810001, 0x20401a6a, 0x00b14040, 0x00000000 },
+{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 },
+{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 },
+{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x0000010c },
+{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 },
+{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 },
+{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 },
+{ 0x0080002d, 0x20000220, 0x00450120, 0x00000000 },
+{ 0x00800040, 0x20802228, 0x1eb14170, 0x00ff00ff },
+{ 0x00800040, 0x20402228, 0x1eb14150, 0x00ff00ff },
+{ 0x00000040, 0x20241a28, 0x1e004180, 0x00800080 },
+{ 0x00600041, 0x20c00a28, 0x0a8d0040, 0x008d0080 },
+{ 0x00600041, 0x20e00a28, 0x0a8d0060, 0x008d00a0 },
+{ 0x00000001, 0x20401e28, 0x00000000, 0x00ff00ff },
+{ 0x0c600038, 0x20800a28, 0x0a8d00e0, 0x00000040 },
+{ 0x0c600038, 0x20600a28, 0x0a8d00c0, 0x00000040 },
+{ 0x00800040, 0x40400a68, 0x1e8d4060, 0x00ff00ff },
+{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 },
+{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 },
+{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x00000180 },
+{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 },
+{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 },
+{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 },
+{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 },
+{ 0x0080002d, 0x20000220, 0x00450100, 0x00000000 },
+{ 0x00000040, 0x20241a28, 0x1e004112, 0x00800080 },
+{ 0x00800041, 0x20802228, 0x1ab10170, 0x00000112 },
+{ 0x03600010, 0x20002261, 0x22ae0170, 0x00ae0150 },
+{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0158 },
+{ 0x00600041, 0x20400a28, 0x22000024, 0x008d0150 },
+{ 0x00800040, 0x20400a28, 0x0a8d0080, 0x008d0040 },
+{ 0x0080000c, 0x40400a68, 0x1e8d0040, 0x00070007 },
+{ 0x03601010, 0x20002261, 0x22ae0171, 0x00ae0151 },
+{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 },
+{ 0x00610001, 0x41501a89, 0x00ae0040, 0x00000000 },
+{ 0x00611001, 0x41511a89, 0x00ae0042, 0x00000000 },
+{ 0x0080002d, 0x20000220, 0x00450180, 0x00000000 },
+{ 0x00000001, 0x20801e28, 0x00000000, 0x00ff00ff },
+{ 0x00800041, 0x20402228, 0x22b10170, 0x00b10150 },
+{ 0x00000040, 0x20241a28, 0x1e004110, 0x00800080 },
+{ 0x0c600038, 0x20c00a28, 0x0a8d0060, 0x00000080 },
+{ 0x0c600038, 0x20a00a28, 0x0a8d0040, 0x00000080 },
+{ 0x00800001, 0x40400a68, 0x008d00a0, 0x00000000 },
+{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 },
+{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 },
+{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x00000110 },
+{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 },
+{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 },
+{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 },
+{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 },
+{ 0x0080002d, 0x20000220, 0x00450140, 0x00000000 },
diff --git a/src/shaders/post_processing/gen8/sharpening_v_blur.g8b b/src/shaders/post_processing/gen8/sharpening_v_blur.g8b
new file mode 100644
index 0000000..a57f43d
--- /dev/null
+++ b/src/shaders/post_processing/gen8/sharpening_v_blur.g8b
@@ -0,0 +1,296 @@
+{ 0x00000001, 0x23601608, 0x00000000, 0x00000000 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000001, 0x23641608, 0x00000000, 0x00020002 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x0007000f },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000360, 0x02490000 },
+{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 },
+{ 0x00000001, 0x20281608, 0x00000000, 0x00040004 },
+{ 0x0c600031, 0x22e03a08, 0x00000040, 0x00000200 },
+{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc },
+{ 0x06000010, 0x20000201, 0x16000020, 0x00040004 },
+{ 0x00010020, 0x34000005, 0x0e001400, 0x00000ff0 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000360, 0x02490000 },
+{ 0x00800001, 0x22a03ae8, 0x008d0320, 0x00000000 },
+{ 0x00800001, 0x22603ae8, 0x008d02e0, 0x00000000 },
+{ 0x00000001, 0x21403ee8, 0x00000000, 0x3e525448 },
+{ 0x00000001, 0x21603ee8, 0x00000000, 0x3875735f },
+{ 0x00000001, 0x20480608, 0x00000000, 0x0007000f },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000040, 0x20440208, 0x16000028, 0x00040004 },
+{ 0x00800001, 0x208022e8, 0x00b10270, 0x00000000 },
+{ 0x0c600031, 0x22e03a08, 0x00000040, 0x00000200 },
+{ 0x00000001, 0x20203ee8, 0x00000000, 0x332bcc77 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x208022e8, 0x00b10280, 0x00000000 },
+{ 0x00800001, 0x204022e8, 0x00b10260, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x00000001, 0x20303ee8, 0x00000000, 0x3c1d98ad },
+{ 0x00800001, 0x204022e8, 0x00b10270, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00000001, 0x21503ee8, 0x00000000, 0x3f11e168 },
+{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00800001, 0x208022e8, 0x00b10280, 0x00000000 },
+{ 0x00800001, 0x21803a28, 0x008d0100, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x61800a88, 0x008d0180, 0x00000000 },
+{ 0x00800001, 0x21a02288, 0x00cf0180, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 },
+{ 0x00800001, 0x204022e8, 0x00b10280, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 },
+{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 },
+{ 0x00800001, 0x21b02288, 0x00cf0220, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 },
+{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 },
+{ 0x00800001, 0x204022e8, 0x00b10290, 0x00000000 },
+{ 0x00800001, 0x21c02288, 0x00cf0220, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x204022e8, 0x00b102a0, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 },
+{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x21d02288, 0x00cf0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x00800001, 0x204022e8, 0x00b102b0, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 },
+{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x204022e8, 0x00b102c0, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00800001, 0x21e02288, 0x00cf0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 },
+{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x21f02288, 0x00cf0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x00800001, 0x204022e8, 0x00b102d0, 0x00000000 },
+{ 0x00600001, 0x21803ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000364, 0x0a0a8000 },
+{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00800001, 0x208022e8, 0x00b10340, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 },
+{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 },
+{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f },
+{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 },
+{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 },
+{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 },
+{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 },
+{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 },
+{ 0x00800001, 0x208022e8, 0x00b10340, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 },
+{ 0x00800001, 0x208022e8, 0x00b10350, 0x00000000 },
+{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 },
+{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 },
+{ 0x00000001, 0x21880608, 0x00000000, 0x0007000f },
+{ 0x00800001, 0x20403a28, 0x008d0100, 0x00000000 },
+{ 0x00000001, 0x21803ae8, 0x0000002c, 0x00000000 },
+{ 0x00800001, 0x22002288, 0x00cf0220, 0x00000000 },
+{ 0x00800001, 0x60400a88, 0x008d0040, 0x00000000 },
+{ 0x00800001, 0x22102288, 0x00cf0040, 0x00000000 },
+{ 0x00000001, 0x21843ae8, 0x00000028, 0x00000000 },
+{ 0x00000040, 0x20280208, 0x16000028, 0x00080008 },
+{ 0x0c600031, 0x20003a00, 0x00000180, 0x00000200 },
+{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc },
+{ 0x05000010, 0x20000200, 0x02000028, 0x00000020 },
+{ 0x00010020, 0x34000004, 0x0e001400, 0xfffff010 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000360, 0x02290000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 },
+{ 0x0c600031, 0x22603a08, 0x00000040, 0x00000200 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000364, 0x060a8000 },
+{ 0x00800001, 0x20603ae8, 0x008d0260, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 },
+{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc },
+{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000360, 0x02290000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000020, 0x00000000 },
+{ 0x0c600031, 0x22603a08, 0x00000040, 0x00000200 },
+{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 },
+{ 0x00000040, 0x22000200, 0x06000364, 0x060a8000 },
+{ 0x00800001, 0x20603ae8, 0x008d0260, 0x00000000 },
+{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f },
+{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 },
+{ 0x00000001, 0x20443ae8, 0x00000020, 0x00000000 },
+{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 },
+{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 },
+{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 },
diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am
index dac58c7..e7d5e76 100644
--- a/src/shaders/render/Makefile.am
+++ b/src/shaders/render/Makefile.am
@@ -1,7 +1,11 @@
INTEL_G4I = \
exa_wm.g4i \
- exa_wm_affine.g4i
+ exa_wm_affine.g4i \
+ exa_wm_yuv_color_balance.gxa \
+ exa_yuv_rgb.gxa \
+ exa_yuv_gen4.g4i \
+ exa_yuv_gen6.g4i
INTEL_G4A = \
exa_sf.g4a \
@@ -9,6 +13,7 @@ INTEL_G4A = \
exa_wm_src_affine.g4a \
exa_wm_src_sample_argb.g4a \
exa_wm_src_sample_planar.g4a \
+ exa_wm_yuv_color_balance.g4a \
exa_wm_yuv_rgb.g4a \
exa_wm_write.g4a
@@ -20,6 +25,7 @@ INTEL_G4B = \
exa_wm_src_affine.g4b \
exa_wm_src_sample_argb.g4b \
exa_wm_src_sample_planar.g4b \
+ exa_wm_yuv_color_balance.g4b \
exa_wm_yuv_rgb.g4b \
exa_wm_write.g4b
@@ -29,14 +35,18 @@ INTEL_G4B_GEN5 = \
exa_wm_src_affine.g4b.gen5 \
exa_wm_src_sample_argb.g4b.gen5 \
exa_wm_src_sample_planar.g4b.gen5 \
+ exa_wm_yuv_color_balance.g4b.gen5 \
exa_wm_yuv_rgb.g4b.gen5 \
exa_wm_write.g4b.gen5
+INTEL_G6I = $(INTEL_G4I)
+
INTEL_G6A = \
exa_wm_src_affine.g6a \
exa_wm_src_sample_argb.g6a \
exa_wm_src_sample_planar.g6a \
exa_wm_write.g6a \
+ exa_wm_yuv_color_balance.g6a \
exa_wm_yuv_rgb.g6a
INTEL_G6S = $(INTEL_G6A:%.g6a=%.g6s)
@@ -46,13 +56,17 @@ INTEL_G6B = \
exa_wm_src_sample_argb.g6b \
exa_wm_src_sample_planar.g6b \
exa_wm_write.g6b \
+ exa_wm_yuv_color_balance.g6b \
exa_wm_yuv_rgb.g6b
+INTEL_G7I = $(INTEL_G4I)
+
INTEL_G7A = \
exa_wm_src_affine.g7a \
exa_wm_src_sample_argb.g7a \
exa_wm_src_sample_planar.g7a \
exa_wm_write.g7a \
+ exa_wm_yuv_color_balance.g7a \
exa_wm_yuv_rgb.g7a
INTEL_G7S = $(INTEL_G7A:%.g7a=%.g7s)
@@ -62,13 +76,33 @@ INTEL_G7B = \
exa_wm_src_sample_argb.g7b \
exa_wm_src_sample_planar.g7b \
exa_wm_write.g7b \
+ exa_wm_yuv_color_balance.g7b \
exa_wm_yuv_rgb.g7b
# XXX: only regenerate binary for EU code containing JMPI instructions
INTEL_G7B_HASWELL = \
exa_wm_src_sample_planar.g7b.haswell \
+ exa_wm_yuv_color_balance.g7b.haswell \
$(NULL)
+INTEL_G8A = \
+ exa_wm_src_affine.g8a \
+ exa_wm_src_sample_planar.g8a \
+ exa_wm_src_sample_argb.g8a \
+ exa_wm_yuv_color_balance.g8a \
+ exa_wm_write.g8a \
+ exa_wm_yuv_rgb.g8a
+
+INTEL_G8S = $(INTEL_G8A:%.g8a=%.g8s)
+
+INTEL_G8B = \
+ exa_wm_src_affine.g8b \
+ exa_wm_src_sample_planar.g8b \
+ exa_wm_src_sample_argb.g8b \
+ exa_wm_yuv_color_balance.g8b \
+ exa_wm_yuv_rgb.g8b \
+ exa_wm_write.g8b
+
TARGETS =
if HAVE_GEN4ASM
TARGETS += $(INTEL_G4B)
@@ -76,11 +110,12 @@ TARGETS += $(INTEL_G4B_GEN5)
TARGETS += $(INTEL_G6B)
TARGETS += $(INTEL_G7B)
TARGETS += $(INTEL_G7B_HASWELL)
+TARGETS += $(INTEL_G8B)
endif
all-local: $(TARGETS)
-SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell
+SUFFIXES = .g4a .g4s .g4b .g4b.gen5 .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell .g8a .g8b .g8s
if HAVE_GEN4ASM
$(INTEL_G4S): $(INTEL_G4A) $(INTEL_G4I)
@@ -104,12 +139,21 @@ $(INTEL_G7S): $(INTEL_G7A) $(INTEL_G7I)
$(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
.g7s.g7b.haswell:
$(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
+
+
+$(INTEL_G8S): $(INTEL_G8A) $(INTEL_G8I)
+.g8a.g8s:
+ $(AM_V_GEN)m4 $< > $@
+.g8s.g8b:
+ $(AM_V_GEN)$(GEN4ASM) -g 8 -o $@ $<
+
endif
CLEANFILES = \
$(INTEL_G4S) \
$(INTEL_G6S) \
$(INTEL_G7S) \
+ $(INTEL_G8S) \
$(NULL)
EXTRA_DIST = \
@@ -122,6 +166,8 @@ EXTRA_DIST = \
$(INTEL_G7A) \
$(INTEL_G7B) \
$(INTEL_G7B_HASWELL) \
+ $(INTEL_G8A) \
+ $(INTEL_G8B) \
$(NULL)
# Extra clean files so that maintainer-clean removes *everything*
diff --git a/src/shaders/render/exa_wm.g4i b/src/shaders/render/exa_wm.g4i
index 8163de5..e186d3a 100644
--- a/src/shaders/render/exa_wm.g4i
+++ b/src/shaders/render/exa_wm.g4i
@@ -1,5 +1,5 @@
/*
- * Copyright © 2006 Intel Corporation
+ * Copyright © 2006-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -39,39 +39,43 @@ define(`screen_y0', `g1.4<0,1,0>F')
define(`interleaved_uv', `g2.0<0,1,0>UW')
/* Source transformation parameters */
-define(`src_du_dx', `g3.0<0,1,0>F')
-define(`src_du_dy', `g3.4<0,1,0>F')
-define(`src_uo', `g3.12<0,1,0>F')
-define(`src_dv_dx', `g3.16<0,1,0>F')
-define(`src_dv_dy', `g3.20<0,1,0>F')
-define(`src_vo', `g3.28<0,1,0>F')
-define(`src_dw_dx', `g4.0<0,1,0>F')
-define(`src_dw_dy', `g4.4<0,1,0>F')
-define(`src_wo', `g4.12<0,1,0>F')
-
-define(`mask_du_dx', `g5.0<0,1,0>F')
-define(`mask_du_dy', `g5.4<0,1,0>F')
-define(`mask_uo', `g5.12<0,1,0>F')
-define(`mask_dv_dx', `g5.16<0,1,0>F')
-define(`mask_dv_dy', `g5.20<0,1,0>F')
-define(`mask_vo', `g5.28<0,1,0>F')
-define(`mask_dw_dx', `g6.0<0,1,0>F')
-define(`mask_dw_dy', `g6.4<0,1,0>F')
-define(`mask_wo', `g6.12<0,1,0>F')
+define(`src_du_dx', `g6.0<0,1,0>F')
+define(`src_du_dy', `g6.4<0,1,0>F')
+define(`src_uo', `g6.12<0,1,0>F')
+define(`src_dv_dx', `g6.16<0,1,0>F')
+define(`src_dv_dy', `g6.20<0,1,0>F')
+define(`src_vo', `g6.28<0,1,0>F')
+define(`src_dw_dx', `g7.0<0,1,0>F')
+define(`src_dw_dy', `g7.4<0,1,0>F')
+define(`src_wo', `g7.12<0,1,0>F')
+
+define(`mask_du_dx', `g8.0<0,1,0>F')
+define(`mask_du_dy', `g8.4<0,1,0>F')
+define(`mask_uo', `g8.12<0,1,0>F')
+define(`mask_dv_dx', `g8.16<0,1,0>F')
+define(`mask_dv_dy', `g8.20<0,1,0>F')
+define(`mask_vo', `g8.28<0,1,0>F')
+define(`mask_dw_dx', `g9.0<0,1,0>F')
+define(`mask_dw_dy', `g9.4<0,1,0>F')
+define(`mask_wo', `g9.12<0,1,0>F')
+
+/* Attribute for snb+ */
+define(`a0_a_x',`g10.0<0,1,0>F')
+define(`a0_a_y',`g10.16<0,1,0>F')
/*
* Local variables. Pairs must be aligned on even reg boundry
*/
/* this holds the X dest coordinates */
-define(`dst_x', `g8')
+define(`dst_x', `g42')
define(`dst_x_0', `dst_x')
-define(`dst_x_1', `g9')
+define(`dst_x_1', `g43')
/* this holds the Y dest coordinates */
-define(`dst_y', `g10')
+define(`dst_y', `g44')
define(`dst_y_0', `dst_y')
-define(`dst_y_1', `g11')
+define(`dst_y_1', `g45')
/* When computing x * dn/dx, use this */
define(`temp_x', `g30')
@@ -142,6 +146,25 @@ define(`mask_sample_a', `g28')
define(`mask_sample_a_01', `g28')
define(`mask_sample_a_23', `g29')
+/* Color Balance to these registers */
+define(`color_balance_base', `g32')
+
+define(`color_balance_r', `g32')
+define(`color_balance_r_01', `g32')
+define(`color_balance_r_23', `g33')
+
+define(`color_balance_g', `g34')
+define(`color_balance_g_01', `g34')
+define(`color_balance_g_23', `g35')
+
+define(`color_balance_b', `g36')
+define(`color_balance_b_01', `g37')
+define(`color_balance_b_23', `g37')
+
+define(`color_balance_a', `g38')
+define(`color_balance_a_01', `g39')
+define(`color_balance_a_23', `g39')
+
/* data port SIMD16 send registers */
define(`data_port_msg_0', `m0')
diff --git a/src/shaders/render/exa_wm_src_affine.g4b b/src/shaders/render/exa_wm_src_affine.g4b
index d30da87..7507b72 100644
--- a/src/shaders/render/exa_wm_src_affine.g4b
+++ b/src/shaders/render/exa_wm_src_affine.g4b
@@ -1,8 +1,8 @@
- { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
- { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
+ { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000c0 },
+ { 0x00802041, 0x238077bd, 0x008d0580, 0x000000c4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
- { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c },
- { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
- { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
+ { 0x00802040, 0x204077be, 0x008d03c0, 0x000000cc },
+ { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000d0 },
+ { 0x00802041, 0x238077bd, 0x008d0580, 0x000000d4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
- { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c },
+ { 0x00802040, 0x208077be, 0x008d03c0, 0x000000dc },
diff --git a/src/shaders/render/exa_wm_src_affine.g4b.gen5 b/src/shaders/render/exa_wm_src_affine.g4b.gen5
index d30da87..7507b72 100644
--- a/src/shaders/render/exa_wm_src_affine.g4b.gen5
+++ b/src/shaders/render/exa_wm_src_affine.g4b.gen5
@@ -1,8 +1,8 @@
- { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
- { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
+ { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000c0 },
+ { 0x00802041, 0x238077bd, 0x008d0580, 0x000000c4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
- { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c },
- { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
- { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
+ { 0x00802040, 0x204077be, 0x008d03c0, 0x000000cc },
+ { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000d0 },
+ { 0x00802041, 0x238077bd, 0x008d0580, 0x000000d4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
- { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c },
+ { 0x00802040, 0x208077be, 0x008d03c0, 0x000000dc },
diff --git a/src/shaders/render/exa_wm_src_affine.g6a b/src/shaders/render/exa_wm_src_affine.g6a
index 568aef3..04358cb 100644
--- a/src/shaders/render/exa_wm_src_affine.g6a
+++ b/src/shaders/render/exa_wm_src_affine.g6a
@@ -35,9 +35,6 @@ define(`vh', `m5')
define(`bl', `g2.0<8,8,1>F')
define(`bh', `g4.0<8,8,1>F')
-define(`a0_a_x',`g7.0<0,1,0>F')
-define(`a0_a_y',`g7.16<0,1,0>F')
-
/* U */
pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
diff --git a/src/shaders/render/exa_wm_src_affine.g6b b/src/shaders/render/exa_wm_src_affine.g6b
index 5d0ffcc..22c1d22 100644
--- a/src/shaders/render/exa_wm_src_affine.g6b
+++ b/src/shaders/render/exa_wm_src_affine.g6b
@@ -1,4 +1,4 @@
- { 0x0060005a, 0x204077be, 0x000000e0, 0x008d0040 },
- { 0x0060005a, 0x206077be, 0x000000e0, 0x008d0080 },
- { 0x0060005a, 0x208077be, 0x000000f0, 0x008d0040 },
- { 0x0060005a, 0x20a077be, 0x000000f0, 0x008d0080 },
+ { 0x0060005a, 0x204077be, 0x00000140, 0x008d0040 },
+ { 0x0060005a, 0x206077be, 0x00000140, 0x008d0080 },
+ { 0x0060005a, 0x208077be, 0x00000150, 0x008d0040 },
+ { 0x0060005a, 0x20a077be, 0x00000150, 0x008d0080 },
diff --git a/src/shaders/render/exa_wm_src_affine.g7a b/src/shaders/render/exa_wm_src_affine.g7a
index a786bc0..88e5ed5 100644
--- a/src/shaders/render/exa_wm_src_affine.g7a
+++ b/src/shaders/render/exa_wm_src_affine.g7a
@@ -35,8 +35,6 @@ define(`vh', `g69')
define(`bl', `g2.0<8,8,1>F')
define(`bh', `g4.0<8,8,1>F')
-define(`a0_a_x',`g7.0<0,1,0>F')
-define(`a0_a_y',`g7.16<0,1,0>F')
/* U */
pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
diff --git a/src/shaders/render/exa_wm_src_affine.g7b b/src/shaders/render/exa_wm_src_affine.g7b
index 5dbbf1b..a15b7b6 100644
--- a/src/shaders/render/exa_wm_src_affine.g7b
+++ b/src/shaders/render/exa_wm_src_affine.g7b
@@ -1,4 +1,4 @@
- { 0x0060005a, 0x284077bd, 0x000000e0, 0x008d0040 },
- { 0x0060005a, 0x286077bd, 0x000000e0, 0x008d0080 },
- { 0x0060005a, 0x288077bd, 0x000000f0, 0x008d0040 },
- { 0x0060005a, 0x28a077bd, 0x000000f0, 0x008d0080 },
+ { 0x0060005a, 0x284077bd, 0x00000140, 0x008d0040 },
+ { 0x0060005a, 0x286077bd, 0x00000140, 0x008d0080 },
+ { 0x0060005a, 0x288077bd, 0x00000150, 0x008d0040 },
+ { 0x0060005a, 0x28a077bd, 0x00000150, 0x008d0080 },
diff --git a/src/shaders/render/exa_wm_src_affine.g8a b/src/shaders/render/exa_wm_src_affine.g8a
new file mode 100644
index 0000000..7927c3b
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_affine.g8a
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`ul', `g66')
+define(`uh', `g67')
+define(`vl', `g68')
+define(`vh', `g69')
+
+define(`bl', `g2.0<8,8,1>F')
+define(`bh', `g4.0<8,8,1>F')
+
+
+/* U */
+pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
+
+/* V */
+pln (8) vl<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) vh<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
diff --git a/src/shaders/render/exa_wm_src_affine.g8b b/src/shaders/render/exa_wm_src_affine.g8b
new file mode 100644
index 0000000..f5f9eca
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_affine.g8b
@@ -0,0 +1,4 @@
+ { 0x0060005a, 0x28403ae8, 0x3a000140, 0x008d0040 },
+ { 0x0060005a, 0x28603ae8, 0x3a000140, 0x008d0080 },
+ { 0x0060005a, 0x28803ae8, 0x3a000150, 0x008d0040 },
+ { 0x0060005a, 0x28a03ae8, 0x3a000150, 0x008d0080 },
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4a b/src/shaders/render/exa_wm_src_sample_argb.g4a
index 8cc693e..c657655 100644
--- a/src/shaders/render/exa_wm_src_sample_argb.g4a
+++ b/src/shaders/render/exa_wm_src_sample_argb.g4a
@@ -48,5 +48,5 @@ send (16) src_msg_ind /* msg reg index */
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
-mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 };
-mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 };
+mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 };
+mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 };
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4b b/src/shaders/render/exa_wm_src_sample_argb.g4b
index 963c121..42e4a68 100644
--- a/src/shaders/render/exa_wm_src_sample_argb.g4b
+++ b/src/shaders/render/exa_wm_src_sample_argb.g4b
@@ -1,4 +1,4 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 },
- { 0x00600041, 0x228077bd, 0x00200280, 0x00000040 },
- { 0x00600041, 0x22a077bd, 0x002002a0, 0x00000040 },
+ { 0x00600041, 0x228077bd, 0x008d0280, 0x00000040 },
+ { 0x00600041, 0x22a077bd, 0x008d02a0, 0x00000040 },
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 b/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5
index 45b3641..2012f89 100644
--- a/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5
+++ b/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5
@@ -1,4 +1,4 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 },
- { 0x00600041, 0x228077bd, 0x00200280, 0x00000040 },
- { 0x00600041, 0x22a077bd, 0x002002a0, 0x00000040 },
+ { 0x00600041, 0x228077bd, 0x008d0280, 0x00000040 },
+ { 0x00600041, 0x22a077bd, 0x008d02a0, 0x00000040 },
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g6a b/src/shaders/render/exa_wm_src_sample_argb.g6a
index 48e79f7..c30b209 100644
--- a/src/shaders/render/exa_wm_src_sample_argb.g6a
+++ b/src/shaders/render/exa_wm_src_sample_argb.g6a
@@ -50,6 +50,6 @@ send (16) src_msg_ind /* msg reg index */
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
-mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 };
+mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 };
mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 };
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g6b b/src/shaders/render/exa_wm_src_sample_argb.g6b
index 8964e45..53c6248 100644
--- a/src/shaders/render/exa_wm_src_sample_argb.g6b
+++ b/src/shaders/render/exa_wm_src_sample_argb.g6b
@@ -1,5 +1,5 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 },
- { 0x00600041, 0x228077bd, 0x00200280, 0x000000c0 },
+ { 0x00600041, 0x228077bd, 0x008d0280, 0x000000c0 },
{ 0x00600041, 0x22a077bd, 0x002002a0, 0x000000c0 },
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g7a b/src/shaders/render/exa_wm_src_sample_argb.g7a
index 620e0e7..0165f7b 100644
--- a/src/shaders/render/exa_wm_src_sample_argb.g7a
+++ b/src/shaders/render/exa_wm_src_sample_argb.g7a
@@ -54,6 +54,6 @@ send (16) src_msg_ind_gen7 /* msg reg index */
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
-mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 mask_disable };
-mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 mask_disable };
+mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 mask_disable };
+mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 mask_disable };
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g7b b/src/shaders/render/exa_wm_src_sample_argb.g7b
index 674fc74..0708bc0 100644
--- a/src/shaders/render/exa_wm_src_sample_argb.g7b
+++ b/src/shaders/render/exa_wm_src_sample_argb.g7b
@@ -1,5 +1,5 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x21c01ca9, 0x00000820, 0x0a8c0001 },
- { 0x00600241, 0x228077bd, 0x00200280, 0x000000c0 },
- { 0x00600241, 0x22a077bd, 0x002002a0, 0x000000c0 },
+ { 0x00600241, 0x228077bd, 0x008d0280, 0x000000c0 },
+ { 0x00600241, 0x22a077bd, 0x008d02a0, 0x000000c0 },
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8a b/src/shaders/render/exa_wm_src_sample_argb.g8a
new file mode 100644
index 0000000..3a4e99f
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_sample_argb.g8a
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* Ivybridge uses GRFs in SEND instruction */
+define(`src_msg_gen8', `g65')
+define(`src_msg_ind_gen8',`65')
+
+/* subpicture global alpha */
+define(`global_alpha', `r6.0<0,1,0>f')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable };
+mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind_gen8 /* msg reg index */
+ src_sample_base<1>UW /* readback */
+ null
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 mask_disable };
+mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 mask_disable };
+
diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8b b/src/shaders/render/exa_wm_src_sample_argb.g8b
new file mode 100644
index 0000000..2b04637
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_sample_argb.g8b
@@ -0,0 +1,5 @@
+ { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x21c00a48, 0x0e000820, 0x0a8c0001 },
+ { 0x00600041, 0x22803aec, 0x3a8d0280, 0x000000c0 },
+ { 0x00600041, 0x22a03aec, 0x3a8d02a0, 0x000000c0 },
diff --git a/src/shaders/render/exa_wm_src_sample_planar.g8a b/src/shaders/render/exa_wm_src_sample_planar.g8a
new file mode 100644
index 0000000..7684491
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_sample_planar.g8a
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+/* Sample the src surface in planar format */
+
+include(`exa_wm.g4i')
+
+/* Ivybridge uses GRFs in SEND instruction */
+define(`src_msg_gen8', `g65')
+define(`src_msg_ind_gen8',`65')
+/* UV flag */
+define(`uv_flag', `g6.0<0,1,0>UW')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+cmp.e.f0.0 (1) null uv_flag 0x1UW {align1};
+(f0.0) jmpi INTERLEAVED_UV;
+
+cmp.e.f0.0 (1) null uv_flag 0x2UW {align1};
+(f0.0) jmpi CONSTANT_UV;
+
+/* load r */
+mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable };
+mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+/* emit sampler 'send' cmd */
+
+/* sample U (Cr) */
+send (16) src_msg_ind_gen8 /* msg reg index */
+ src_sample_g<1>UW /* readback */
+ null
+ sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
+/* sample V (Cb) */
+mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable };
+mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+send (16) src_msg_ind_gen8 /* msg reg index */
+ src_sample_b<1>UW /* readback */
+ null
+ sampler (5,4,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
+jmpi SAMPLE_Y;
+
+CONSTANT_UV:
+mov (16) src_sample_g<1>f 0.5f { compr align1 mask_disable };
+mov (16) src_sample_b<1>f 0.5f { compr align1 mask_disable };
+
+jmpi SAMPLE_Y;
+
+INTERLEAVED_UV:
+mov (1) g0.8<1>UD 0x0000c000UD { align1 mask_disable };
+mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+/* sample UV (CrCb) */
+send (16) src_msg_ind_gen8 /* msg reg index */
+ src_sample_g<1>UW /* readback */
+ null
+ sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 4 { align1 }; /* required message len 5, readback len 8 */
+
+
+SAMPLE_Y:
+mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable };
+mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+/* sample Y */
+send (16) src_msg_ind_gen8 /* msg reg index */
+ src_sample_r<1>UW /* readback */
+ null
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
diff --git a/src/shaders/render/exa_wm_src_sample_planar.g8b b/src/shaders/render/exa_wm_src_sample_planar.g8b
new file mode 100644
index 0000000..f29cfe4
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_sample_planar.g8b
@@ -0,0 +1,20 @@
+ { 0x01000010, 0x200012e0, 0x160000c0, 0x00010001 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 },
+ { 0x01000010, 0x200012e0, 0x160000c0, 0x00020002 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 },
+ { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 },
+ { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22000a48, 0x0e000820, 0x0a2c0203 },
+ { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 },
+ { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22400a48, 0x0e000820, 0x0a2c0405 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 },
+ { 0x00800001, 0x22003eec, 0x38000000, 0x3f000000 },
+ { 0x00800001, 0x22403eec, 0x38000000, 0x3f000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x00000001, 0x2008060c, 0x00000000, 0x0000c000 },
+ { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22000a48, 0x0e000820, 0x0a4c0203 },
+ { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 },
+ { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x21c00a48, 0x0e000820, 0x0a2c0001 },
diff --git a/src/shaders/render/exa_wm_write.g7a b/src/shaders/render/exa_wm_write.g7a
index a2fb447..4b17929 100644
--- a/src/shaders/render/exa_wm_write.g7a
+++ b/src/shaders/render/exa_wm_write.g7a
@@ -45,17 +45,17 @@ define(`slot_b_01', `g71')
define(`slot_a_00', `g72')
define(`slot_a_01', `g73')
-mov (8) slot_r_00<1>F src_sample_r_01<1>F { align1 mask_disable };
-mov (8) slot_r_01<1>F src_sample_r_23<1>F { align1 mask_disable };
+mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 mask_disable };
-mov (8) slot_g_00<1>F src_sample_g_01<1>F { align1 mask_disable };
-mov (8) slot_g_01<1>F src_sample_g_23<1>F { align1 mask_disable };
+mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 mask_disable };
-mov (8) slot_b_00<1>F src_sample_b_01<1>F { align1 mask_disable };
-mov (8) slot_b_01<1>F src_sample_b_23<1>F { align1 mask_disable };
+mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 mask_disable };
-mov (8) slot_a_00<1>F src_sample_a_01<1>F { align1 mask_disable };
-mov (8) slot_a_01<1>F src_sample_a_23<1>F { align1 mask_disable };
+mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 mask_disable };
send (16)
data_port_msg_2_ind
diff --git a/src/shaders/render/exa_wm_write.g7b b/src/shaders/render/exa_wm_write.g7b
index 05e1801..4f347cb 100644
--- a/src/shaders/render/exa_wm_write.g7b
+++ b/src/shaders/render/exa_wm_write.g7b
@@ -1,13 +1,13 @@
{ 0x00600201, 0x28000021, 0x008d0000, 0x00000000 },
{ 0x00600201, 0x28200021, 0x008d0020, 0x00000000 },
- { 0x00600201, 0x284003bd, 0x002001c0, 0x00000000 },
- { 0x00600201, 0x286003bd, 0x002001e0, 0x00000000 },
- { 0x00600201, 0x288003bd, 0x00200200, 0x00000000 },
- { 0x00600201, 0x28a003bd, 0x00200220, 0x00000000 },
- { 0x00600201, 0x28c003bd, 0x00200240, 0x00000000 },
- { 0x00600201, 0x28e003bd, 0x00200260, 0x00000000 },
- { 0x00600201, 0x290003bd, 0x00200280, 0x00000000 },
- { 0x00600201, 0x292003bd, 0x002002a0, 0x00000000 },
+ { 0x00600201, 0x284003bd, 0x008d01c0, 0x00000000 },
+ { 0x00600201, 0x286003bd, 0x008d01e0, 0x00000000 },
+ { 0x00600201, 0x288003bd, 0x008d0200, 0x00000000 },
+ { 0x00600201, 0x28a003bd, 0x008d0220, 0x00000000 },
+ { 0x00600201, 0x28c003bd, 0x008d0240, 0x00000000 },
+ { 0x00600201, 0x28e003bd, 0x008d0260, 0x00000000 },
+ { 0x00600201, 0x290003bd, 0x008d0280, 0x00000000 },
+ { 0x00600201, 0x292003bd, 0x008d02a0, 0x00000000 },
{ 0x05800031, 0x20001ca8, 0x00000800, 0x940b1000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/shaders/render/exa_wm_write.g8a b/src/shaders/render/exa_wm_write.g8a
new file mode 100644
index 0000000..e6da9b6
--- /dev/null
+++ b/src/shaders/render/exa_wm_write.g8a
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/* header */
+define(`data_port_msg_2_0', `g64')
+define(`data_port_msg_2_1', `g65')
+define(`data_port_msg_2_ind', `64')
+
+mov (8) data_port_msg_2_0<1>UD g0<8,8,1>UD {align1 mask_disable};
+mov (8) data_port_msg_2_1<1>UD g1<8,8,1>UD {align1 mask_disable};
+
+/*
+ * Prepare data in g66-g67 for Red channel, g68-g69 for Green channel,
+ * g70-g71 for Blue and g72-g73 for Alpha channel
+ */
+define(`slot_r_00', `g66')
+define(`slot_r_01', `g67')
+define(`slot_g_00', `g68')
+define(`slot_g_01', `g69')
+define(`slot_b_00', `g70')
+define(`slot_b_01', `g71')
+define(`slot_a_00', `g72')
+define(`slot_a_01', `g73')
+
+mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 mask_disable };
+
+mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 mask_disable };
+
+mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 mask_disable };
+
+mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 mask_disable };
+mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 mask_disable };
+
+send (16)
+ data_port_msg_2_ind
+ null<1>UW
+ null
+ write (
+ 0, /* binding table index */
+ 16, /* last render target(1) + slots 15:0(0) + msg type simd16 single source(000) */
+ 12, /* render target write */
+ 0, /* ignore for Ivybridge */
+ 1 /* header present */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+
diff --git a/src/shaders/render/exa_wm_write.g8b b/src/shaders/render/exa_wm_write.g8b
new file mode 100644
index 0000000..822578d
--- /dev/null
+++ b/src/shaders/render/exa_wm_write.g8b
@@ -0,0 +1,19 @@
+ { 0x00600001, 0x2800020c, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x2820020c, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x28403aec, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28603aec, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28803aec, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x28a03aec, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x28c03aec, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x28e03aec, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x29003aec, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x29203aec, 0x008d02a0, 0x00000000 },
+ { 0x05800031, 0x20000a40, 0x0e000800, 0x940b1000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/shaders/render/exa_wm_xy.g4b b/src/shaders/render/exa_wm_xy.g4b
index 327fc29..2b3b235 100644
--- a/src/shaders/render/exa_wm_xy.g4b
+++ b/src/shaders/render/exa_wm_xy.g4b
@@ -1,4 +1,4 @@
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
- { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
- { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
+ { 0x00802040, 0x2540753d, 0x008d03c0, 0x00004020 },
+ { 0x00802040, 0x2580753d, 0x008d0380, 0x00004024 },
diff --git a/src/shaders/render/exa_wm_xy.g4b.gen5 b/src/shaders/render/exa_wm_xy.g4b.gen5
index 327fc29..2b3b235 100644
--- a/src/shaders/render/exa_wm_xy.g4b.gen5
+++ b/src/shaders/render/exa_wm_xy.g4b.gen5
@@ -1,4 +1,4 @@
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
- { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
- { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
+ { 0x00802040, 0x2540753d, 0x008d03c0, 0x00004020 },
+ { 0x00802040, 0x2580753d, 0x008d0380, 0x00004024 },
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4a b/src/shaders/render/exa_wm_yuv_color_balance.g4a
new file mode 100644
index 0000000..33ba67a
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g4a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Haihao Xiang <haihao.xiang@intel.com>
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/* Color Balance parameters */
+define(`skip_color_balance', `g2.2<0,1,0>uw')
+define(`contrast', `g2.16<0,1,0>f')
+define(`brightness', `g2.20<0,1,0>f')
+define(`cos_c_s', `g2.24<0,1,0>f')
+define(`sin_c_s', `g2.28<0,1,0>f')
+define(`sin_c_s_t', `g2.28')
+
+include(`exa_wm_yuv_color_balance.gxa')
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4b b/src/shaders/render/exa_wm_yuv_color_balance.g4b
new file mode 100644
index 0000000..cba9aca
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g4b
@@ -0,0 +1,15 @@
+ { 0x01000010, 0x20002d3c, 0x00000042, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000000d },
+ { 0x00802040, 0x24007fbd, 0x008d01c0, 0xbd808081 },
+ { 0x00802041, 0x240077bd, 0x008d0400, 0x00000050 },
+ { 0x00802040, 0x240077bd, 0x008d0400, 0x00000054 },
+ { 0x00802040, 0x21c07fbd, 0x008d0400, 0x3d808081 },
+ { 0x00802040, 0x24807fbd, 0x008d0200, 0xbf008084 },
+ { 0x00802040, 0x24407fbd, 0x008d0240, 0xbf008084 },
+ { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00802048, 0x240077bc, 0x008d0440, 0x0000005c },
+ { 0x00802048, 0x220077bd, 0x008d0480, 0x00000058 },
+ { 0x00000041, 0x205c7fbd, 0x0000005c, 0xbf800000 },
+ { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00802048, 0x240077bc, 0x008d0480, 0x0000005c },
+ { 0x00802048, 0x224077bd, 0x008d0440, 0x00000058 },
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 b/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5
new file mode 100644
index 0000000..5a24a0e
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5
@@ -0,0 +1,15 @@
+ { 0x01000010, 0x20002d3c, 0x00000042, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a },
+ { 0x00802040, 0x24007fbd, 0x008d01c0, 0xbd808081 },
+ { 0x00802041, 0x240077bd, 0x008d0400, 0x00000050 },
+ { 0x00802040, 0x240077bd, 0x008d0400, 0x00000054 },
+ { 0x00802040, 0x21c07fbd, 0x008d0400, 0x3d808081 },
+ { 0x00802040, 0x24807fbd, 0x008d0200, 0xbf008084 },
+ { 0x00802040, 0x24407fbd, 0x008d0240, 0xbf008084 },
+ { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00802048, 0x240077bc, 0x008d0440, 0x0000005c },
+ { 0x00802048, 0x220077bd, 0x008d0480, 0x00000058 },
+ { 0x00000041, 0x205c7fbd, 0x0000005c, 0xbf800000 },
+ { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00802048, 0x240077bc, 0x008d0480, 0x0000005c },
+ { 0x00802048, 0x224077bd, 0x008d0440, 0x00000058 },
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g6a b/src/shaders/render/exa_wm_yuv_color_balance.g6a
new file mode 100644
index 0000000..6906357
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g6a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Haihao Xiang <haihao.xiang@intel.com>
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/* Color Balance parameters */
+define(`skip_color_balance', `g6.2<0,1,0>uw')
+define(`contrast', `g6.16<0,1,0>f')
+define(`brightness', `g6.20<0,1,0>f')
+define(`cos_c_s', `g6.24<0,1,0>f')
+define(`sin_c_s', `g6.28<0,1,0>f')
+define(`sin_c_s_t', `g6.28')
+
+include(`exa_wm_yuv_color_balance.gxa')
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g6b b/src/shaders/render/exa_wm_yuv_color_balance.g6b
new file mode 100644
index 0000000..0a9e6b9
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g6b
@@ -0,0 +1,15 @@
+ { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a },
+ { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 },
+ { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 },
+ { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 },
+ { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 },
+ { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 },
+ { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 },
+ { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc },
+ { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 },
+ { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 },
+ { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc },
+ { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 },
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7a b/src/shaders/render/exa_wm_yuv_color_balance.g7a
new file mode 100644
index 0000000..6906357
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g7a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Haihao Xiang <haihao.xiang@intel.com>
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/* Color Balance parameters */
+define(`skip_color_balance', `g6.2<0,1,0>uw')
+define(`contrast', `g6.16<0,1,0>f')
+define(`brightness', `g6.20<0,1,0>f')
+define(`cos_c_s', `g6.24<0,1,0>f')
+define(`sin_c_s', `g6.28<0,1,0>f')
+define(`sin_c_s_t', `g6.28')
+
+include(`exa_wm_yuv_color_balance.gxa')
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7b b/src/shaders/render/exa_wm_yuv_color_balance.g7b
new file mode 100644
index 0000000..0a9e6b9
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g7b
@@ -0,0 +1,15 @@
+ { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a },
+ { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 },
+ { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 },
+ { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 },
+ { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 },
+ { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 },
+ { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 },
+ { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc },
+ { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 },
+ { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 },
+ { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc },
+ { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 },
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell b/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell
new file mode 100644
index 0000000..2780c08
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell
@@ -0,0 +1,15 @@
+ { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x000000d0 },
+ { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 },
+ { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 },
+ { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 },
+ { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 },
+ { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 },
+ { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 },
+ { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc },
+ { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 },
+ { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 },
+ { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 },
+ { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc },
+ { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 },
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g8a b/src/shaders/render/exa_wm_yuv_color_balance.g8a
new file mode 100644
index 0000000..f3cc28f
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g8a
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Haihao Xiang <haihao.xiang@intel.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/* Color Balance parameters */
+define(`skip_color_balance', `g6.2<0,1,0>uw')
+define(`contrast', `g6.16<0,1,0>f')
+define(`brightness', `g6.20<0,1,0>f')
+define(`cos_c_s', `g6.24<0,1,0>f')
+define(`sin_c_s', `g6.28<0,1,0>f')
+define(`sin_c_s_t', `g6.28')
+
+include(`exa_wm_yuv_color_balance.gxa')
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g8b b/src/shaders/render/exa_wm_yuv_color_balance.g8b
new file mode 100644
index 0000000..5dc2c8b
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.g8b
@@ -0,0 +1,15 @@
+ { 0x01000010, 0x200012e0, 0x160000c2, 0x00010001 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000d0 },
+ { 0x00800040, 0x24003ae8, 0x3e8d01c0, 0xbd808081 },
+ { 0x00800041, 0x24003ae8, 0x3a8d0400, 0x000000d0 },
+ { 0x00800040, 0x24003ae8, 0x3a8d0400, 0x000000d4 },
+ { 0x00800040, 0x21c03ae8, 0x3e8d0400, 0x3d808081 },
+ { 0x00800040, 0x24803ae8, 0x3e8d0200, 0xbf008084 },
+ { 0x00800040, 0x24403ae8, 0x3e8d0240, 0xbf008084 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x3f008084 },
+ { 0x00800048, 0x24003ae0, 0x3a8d0440, 0x000000dc },
+ { 0x00800048, 0x22003ae8, 0x3a8d0480, 0x000000d8 },
+ { 0x00000041, 0x20dc3ae8, 0x3e0000dc, 0xbf800000 },
+ { 0x00800001, 0x24003ee0, 0x38000000, 0x3f008084 },
+ { 0x00800048, 0x24003ae0, 0x3a8d0480, 0x000000dc },
+ { 0x00800048, 0x22403ae8, 0x3a8d0440, 0x000000d8 },
diff --git a/src/shaders/render/exa_wm_yuv_color_balance.gxa b/src/shaders/render/exa_wm_yuv_color_balance.gxa
new file mode 100644
index 0000000..948067c
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_color_balance.gxa
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Haihao Xiang <haihao.xiang@intel.com>
+ *
+ */
+
+define(`Cr', `src_sample_b')
+define(`Cr_01', `src_sample_b_01')
+define(`Cr_23', `src_sample_b_23')
+
+define(`Y', `src_sample_r')
+define(`Y_01', `src_sample_r_01')
+define(`Y_23', `src_sample_r_23')
+
+define(`Cb', `src_sample_g')
+define(`Cb_01', `src_sample_g_01')
+define(`Cb_23', `src_sample_g_23')
+
+define(`Crn', `color_balance_g')
+define(`Crn_01', `color_balance_g_01')
+define(`Crn_23', `color_balance_g_23')
+
+define(`Yn', `color_balance_r')
+define(`Yn_01', `color_balance_r_01')
+define(`Yn_23', `color_balance_r_23')
+
+define(`Cbn', `color_balance_b')
+define(`Cbn_01', `color_balance_b_01')
+define(`Cbn_23', `color_balance_b_23')
+
+cmp.e.f0.0 (1) null skip_color_balance 0x1uw {align1};
+(f0.0) jmpi _DONE_COLOR_BALANCE;
+
+/* Yout = (Yin - 16 / 255) * contrast + brightness + 16 / 255 */
+add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 };
+mul (16) Yn<1>F Yn<8,8,1>F contrast { compr align1 };
+add (16) Yn<1>F Yn<8,8,1>F brightness { compr align1 };
+add (16) Y<1>F Yn<8,8,1>F 0.0627451F { compr align1 };
+
+/* Uout = (Uin - 128 / 255) * cos_c_s + (Vin - 128 / 255) * sin_c_s + 128 / 255 */
+/* Vout = (Vin - 128 / 255) * cos_c_s - (Uin - 128 / 255) * sin_c_s + 128 / 255 */
+add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 };
+add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 };
+
+mov (16) acc0<1>F 0.501961F { compr align1 };
+mac (16) acc0<1>F Crn<8,8,1>F sin_c_s { compr align1 };
+mac (16) Cb<1>F Cbn<8,8,1>F cos_c_s { compr align1 };
+
+mul (1) sin_c_s_t<1>F sin_c_s -1.0F { align1};
+mov (16) acc0<1>F 0.501961F { compr align1 };
+mac (16) acc0<1>F Cbn<8,8,1>F sin_c_s { compr align1 };
+mac (16) Cr<1>F Crn<8,8,1>F cos_c_s { compr align1 };
+
+_DONE_COLOR_BALANCE:
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4a b/src/shaders/render/exa_wm_yuv_rgb.g4a
index b3abe4b..e3d2464 100644
--- a/src/shaders/render/exa_wm_yuv_rgb.g4a
+++ b/src/shaders/render/exa_wm_yuv_rgb.g4a
@@ -1,5 +1,5 @@
/*
- * Copyright © 2006 Intel Corporation
+ * Copyright © 2006-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,72 +27,6 @@
*/
include(`exa_wm.g4i')
+include(`exa_yuv_gen4.g4i')
+include(`exa_yuv_rgb.gxa')
-define(`YCbCr_base', `src_sample_base')
-
-define(`Cr', `src_sample_b')
-define(`Cr_01', `src_sample_b_01')
-define(`Cr_23', `src_sample_b_23')
-
-define(`Y', `src_sample_r')
-define(`Y_01', `src_sample_r_01')
-define(`Y_23', `src_sample_r_23')
-
-define(`Cb', `src_sample_g')
-define(`Cb_01', `src_sample_g_01')
-define(`Cb_23', `src_sample_g_23')
-
-define(`Crn', `mask_sample_g')
-define(`Crn_01', `mask_sample_g_01')
-define(`Crn_23', `mask_sample_g_23')
-
-define(`Yn', `mask_sample_r')
-define(`Yn_01', `mask_sample_r_01')
-define(`Yn_23', `mask_sample_r_23')
-
-define(`Cbn', `mask_sample_b')
-define(`Cbn_01', `mask_sample_b_01')
-define(`Cbn_23', `mask_sample_b_23')
-
- /* color space conversion function:
- * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
- * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
- * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
- */
-
- /* Normalize Y, Cb and Cr:
- *
- * Yn = (Y - 16/255) * 1.164
- * Crn = Cr - 128 / 255
- * Cbn = Cb - 128 / 255
- */
-add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 };
-mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 };
-
-add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 };
-
-add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 };
-
- /*
- * R = Y + Cr * 1.596
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 };
-
- /*
- * G = Crn * -0.813 + Cbn * -0.392 + Y
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 };
-mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 };
-
- /*
- * B = Cbn * 2.017 + Y
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 };
-
- /*
- * A = 1.0
- */
-mov (16) src_sample_a<1>F 1.0F { compr align1 };
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4b b/src/shaders/render/exa_wm_yuv_rgb.g4b
index 6b99838..b116ece 100644
--- a/src/shaders/render/exa_wm_yuv_rgb.g4b
+++ b/src/shaders/render/exa_wm_yuv_rgb.g4b
@@ -1,12 +1,13 @@
- { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
- { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
- { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 },
- { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 },
- { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
- { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
- { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
- { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
+ { 0x00802040, 0x22c077bd, 0x008d01c0, 0x0000006c },
+ { 0x00802040, 0x230077bd, 0x008d0200, 0x0000007c },
+ { 0x00802040, 0x234077bd, 0x008d0240, 0x0000008c },
+ { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000060 },
+ { 0x00802048, 0x240077bc, 0x008d0300, 0x00000064 },
+ { 0x80802048, 0x21c077bd, 0x008d0340, 0x00000068 },
+ { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000070 },
+ { 0x00802048, 0x240077bc, 0x008d0300, 0x00000074 },
+ { 0x80802048, 0x220077bd, 0x008d0340, 0x00000078 },
+ { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000080 },
+ { 0x00802048, 0x240077bc, 0x008d0300, 0x00000084 },
+ { 0x80802048, 0x224077bd, 0x008d0340, 0x00000088 },
{ 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 b/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5
index 6b99838..b116ece 100644
--- a/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5
+++ b/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5
@@ -1,12 +1,13 @@
- { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
- { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
- { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 },
- { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 },
- { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
- { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
- { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
- { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
+ { 0x00802040, 0x22c077bd, 0x008d01c0, 0x0000006c },
+ { 0x00802040, 0x230077bd, 0x008d0200, 0x0000007c },
+ { 0x00802040, 0x234077bd, 0x008d0240, 0x0000008c },
+ { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000060 },
+ { 0x00802048, 0x240077bc, 0x008d0300, 0x00000064 },
+ { 0x80802048, 0x21c077bd, 0x008d0340, 0x00000068 },
+ { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000070 },
+ { 0x00802048, 0x240077bc, 0x008d0300, 0x00000074 },
+ { 0x80802048, 0x220077bd, 0x008d0340, 0x00000078 },
+ { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000080 },
+ { 0x00802048, 0x240077bc, 0x008d0300, 0x00000084 },
+ { 0x80802048, 0x224077bd, 0x008d0340, 0x00000088 },
{ 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g6a b/src/shaders/render/exa_wm_yuv_rgb.g6a
index b3abe4b..ede0298 100644
--- a/src/shaders/render/exa_wm_yuv_rgb.g6a
+++ b/src/shaders/render/exa_wm_yuv_rgb.g6a
@@ -1,5 +1,5 @@
/*
- * Copyright © 2006 Intel Corporation
+ * Copyright © 2006-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,72 +27,5 @@
*/
include(`exa_wm.g4i')
-
-define(`YCbCr_base', `src_sample_base')
-
-define(`Cr', `src_sample_b')
-define(`Cr_01', `src_sample_b_01')
-define(`Cr_23', `src_sample_b_23')
-
-define(`Y', `src_sample_r')
-define(`Y_01', `src_sample_r_01')
-define(`Y_23', `src_sample_r_23')
-
-define(`Cb', `src_sample_g')
-define(`Cb_01', `src_sample_g_01')
-define(`Cb_23', `src_sample_g_23')
-
-define(`Crn', `mask_sample_g')
-define(`Crn_01', `mask_sample_g_01')
-define(`Crn_23', `mask_sample_g_23')
-
-define(`Yn', `mask_sample_r')
-define(`Yn_01', `mask_sample_r_01')
-define(`Yn_23', `mask_sample_r_23')
-
-define(`Cbn', `mask_sample_b')
-define(`Cbn_01', `mask_sample_b_01')
-define(`Cbn_23', `mask_sample_b_23')
-
- /* color space conversion function:
- * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
- * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
- * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
- */
-
- /* Normalize Y, Cb and Cr:
- *
- * Yn = (Y - 16/255) * 1.164
- * Crn = Cr - 128 / 255
- * Cbn = Cb - 128 / 255
- */
-add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 };
-mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 };
-
-add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 };
-
-add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 };
-
- /*
- * R = Y + Cr * 1.596
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 };
-
- /*
- * G = Crn * -0.813 + Cbn * -0.392 + Y
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 };
-mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 };
-
- /*
- * B = Cbn * 2.017 + Y
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 };
-
- /*
- * A = 1.0
- */
-mov (16) src_sample_a<1>F 1.0F { compr align1 };
+include(`exa_yuv_gen6.g4i')
+include(`exa_yuv_rgb.gxa')
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g6b b/src/shaders/render/exa_wm_yuv_rgb.g6b
index 6c8c724..d09ae00 100644
--- a/src/shaders/render/exa_wm_yuv_rgb.g6b
+++ b/src/shaders/render/exa_wm_yuv_rgb.g6b
@@ -1,12 +1,13 @@
- { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
- { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
- { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 },
- { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 },
- { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
- { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
- { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
- { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
+ { 0x00800040, 0x22c077bd, 0x008d01c0, 0x000000ec },
+ { 0x00800040, 0x230077bd, 0x008d0200, 0x000000fc },
+ { 0x00800040, 0x234077bd, 0x008d0240, 0x0000010c },
+ { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000e0 },
+ { 0x00800048, 0x240077bc, 0x008d0300, 0x000000e4 },
+ { 0x80800048, 0x21c077bd, 0x008d0340, 0x000000e8 },
+ { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000f0 },
+ { 0x00800048, 0x240077bc, 0x008d0300, 0x000000f4 },
+ { 0x80800048, 0x220077bd, 0x008d0340, 0x000000f8 },
+ { 0x00800041, 0x240077bc, 0x008d02c0, 0x00000100 },
+ { 0x00800048, 0x240077bc, 0x008d0300, 0x00000104 },
+ { 0x80800048, 0x224077bd, 0x008d0340, 0x00000108 },
{ 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g7a b/src/shaders/render/exa_wm_yuv_rgb.g7a
index 5cd33e2..ede0298 100644
--- a/src/shaders/render/exa_wm_yuv_rgb.g7a
+++ b/src/shaders/render/exa_wm_yuv_rgb.g7a
@@ -1,5 +1,5 @@
/*
- * Copyright © 2006 Intel Corporation
+ * Copyright © 2006-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,72 +27,5 @@
*/
include(`exa_wm.g4i')
-
-define(`YCbCr_base', `src_sample_base')
-
-define(`Cr', `src_sample_b')
-define(`Cr_01', `src_sample_b_01')
-define(`Cr_23', `src_sample_b_23')
-
-define(`Y', `src_sample_r')
-define(`Y_01', `src_sample_r_01')
-define(`Y_23', `src_sample_r_23')
-
-define(`Cb', `src_sample_g')
-define(`Cb_01', `src_sample_g_01')
-define(`Cb_23', `src_sample_g_23')
-
-define(`Crn', `mask_sample_g')
-define(`Crn_01', `mask_sample_g_01')
-define(`Crn_23', `mask_sample_g_23')
-
-define(`Yn', `mask_sample_r')
-define(`Yn_01', `mask_sample_r_01')
-define(`Yn_23', `mask_sample_r_23')
-
-define(`Cbn', `mask_sample_b')
-define(`Cbn_01', `mask_sample_b_01')
-define(`Cbn_23', `mask_sample_b_23')
-
- /* color space conversion function:
- * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
- * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
- * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
- */
-
- /* Normalize Y, Cb and Cr:
- *
- * Yn = (Y - 16/255) * 1.164
- * Crn = Cr - 128 / 255
- * Cbn = Cb - 128 / 255
- */
-add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 };
-mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 };
-
-add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 };
-
-add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 };
-
- /*
- * R = Y + Cr * 1.596
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 };
-
- /*
- * G = Crn * -0.813 + Cbn * -0.392 + Y
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 };
-mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 };
-
- /*
- * B = Cbn * 2.017 + Y
- */
-mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
-mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 };
-
- /*
- * A = 1.0
- */
-mov (16) src_sample_a<1>F 1.0F { compr align1 };
+include(`exa_yuv_gen6.g4i')
+include(`exa_yuv_rgb.gxa')
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g7b b/src/shaders/render/exa_wm_yuv_rgb.g7b
index 6c8c724..d09ae00 100644
--- a/src/shaders/render/exa_wm_yuv_rgb.g7b
+++ b/src/shaders/render/exa_wm_yuv_rgb.g7b
@@ -1,12 +1,13 @@
- { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
- { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
- { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 },
- { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 },
- { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
- { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
- { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
- { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
- { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
+ { 0x00800040, 0x22c077bd, 0x008d01c0, 0x000000ec },
+ { 0x00800040, 0x230077bd, 0x008d0200, 0x000000fc },
+ { 0x00800040, 0x234077bd, 0x008d0240, 0x0000010c },
+ { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000e0 },
+ { 0x00800048, 0x240077bc, 0x008d0300, 0x000000e4 },
+ { 0x80800048, 0x21c077bd, 0x008d0340, 0x000000e8 },
+ { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000f0 },
+ { 0x00800048, 0x240077bc, 0x008d0300, 0x000000f4 },
+ { 0x80800048, 0x220077bd, 0x008d0340, 0x000000f8 },
+ { 0x00800041, 0x240077bc, 0x008d02c0, 0x00000100 },
+ { 0x00800048, 0x240077bc, 0x008d0300, 0x00000104 },
+ { 0x80800048, 0x224077bd, 0x008d0340, 0x00000108 },
{ 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8a b/src/shaders/render/exa_wm_yuv_rgb.g8a
new file mode 100644
index 0000000..9da53c8
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_rgb.g8a
@@ -0,0 +1,32 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Packard <keithp@keithp.com>
+ * Eric Anholt <eric@anholt.net>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+include(`exa_wm.g4i')
+include(`exa_yuv_gen6.g4i')
+include(`exa_yuv_rgb.gxa')
diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8b b/src/shaders/render/exa_wm_yuv_rgb.g8b
new file mode 100644
index 0000000..6b6b4d1
--- /dev/null
+++ b/src/shaders/render/exa_wm_yuv_rgb.g8b
@@ -0,0 +1,13 @@
+ { 0x00800040, 0x22c03ae8, 0x3a8d01c0, 0x000000ec },
+ { 0x00800040, 0x23003ae8, 0x3a8d0200, 0x000000fc },
+ { 0x00800040, 0x23403ae8, 0x3a8d0240, 0x0000010c },
+ { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x000000e0 },
+ { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x000000e4 },
+ { 0x80800048, 0x21c03ae8, 0x3a8d0340, 0x000000e8 },
+ { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x000000f0 },
+ { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x000000f4 },
+ { 0x80800048, 0x22003ae8, 0x3a8d0340, 0x000000f8 },
+ { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x00000100 },
+ { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x00000104 },
+ { 0x80800048, 0x22403ae8, 0x3a8d0340, 0x00000108 },
+ { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
diff --git a/src/shaders/render/exa_yuv_gen4.g4i b/src/shaders/render/exa_yuv_gen4.g4i
new file mode 100644
index 0000000..5a66616
--- /dev/null
+++ b/src/shaders/render/exa_yuv_gen4.g4i
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+/* YUV to RGB matrix coeff */
+
+define(`coef_ry', `g3.0<0,1,0>F')
+define(`coef_ru', `g3.4<0,1,0>F')
+define(`coef_rv', `g3.8<0,1,0>F')
+define(`coef_yd', `g3.12<0,1,0>F')
+
+define(`coef_gy', `g3.16<0,1,0>F')
+define(`coef_gu', `g3.20<0,1,0>F')
+define(`coef_gv', `g3.24<0,1,0>F')
+define(`coef_ud', `g3.28<0,1,0>F')
+
+define(`coef_by', `g4.0<0,1,0>F')
+define(`coef_bu', `g4.4<0,1,0>F')
+define(`coef_bv', `g4.8<0,1,0>F')
+define(`coef_vd', `g4.12<0,1,0>F')
diff --git a/src/shaders/render/exa_yuv_gen6.g4i b/src/shaders/render/exa_yuv_gen6.g4i
new file mode 100644
index 0000000..a8d69ee
--- /dev/null
+++ b/src/shaders/render/exa_yuv_gen6.g4i
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+/* YUV to RGB matrix coeff */
+
+
+define(`coef_ry', `g7.0<0,1,0>F')
+define(`coef_ru', `g7.4<0,1,0>F')
+define(`coef_rv', `g7.8<0,1,0>F')
+define(`coef_yd', `g7.12<0,1,0>F')
+
+define(`coef_gy', `g7.16<0,1,0>F')
+define(`coef_gu', `g7.20<0,1,0>F')
+define(`coef_gv', `g7.24<0,1,0>F')
+define(`coef_ud', `g7.28<0,1,0>F')
+
+define(`coef_by', `g8.0<0,1,0>F')
+define(`coef_bu', `g8.4<0,1,0>F')
+define(`coef_bv', `g8.8<0,1,0>F')
+define(`coef_vd', `g8.12<0,1,0>F')
diff --git a/src/shaders/render/exa_yuv_rgb.gxa b/src/shaders/render/exa_yuv_rgb.gxa
new file mode 100644
index 0000000..656ae73
--- /dev/null
+++ b/src/shaders/render/exa_yuv_rgb.gxa
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+define(`YCbCr_base', `src_sample_base')
+
+define(`Cr', `src_sample_b')
+define(`Cr_01', `src_sample_b_01')
+define(`Cr_23', `src_sample_b_23')
+
+define(`Y', `src_sample_r')
+define(`Y_01', `src_sample_r_01')
+define(`Y_23', `src_sample_r_23')
+
+define(`Cb', `src_sample_g')
+define(`Cb_01', `src_sample_g_01')
+define(`Cb_23', `src_sample_g_23')
+
+define(`Crn', `mask_sample_b')
+define(`Crn_01', `mask_sample_b_01')
+define(`Crn_23', `mask_sample_b_23')
+
+define(`Yn', `mask_sample_r')
+define(`Yn_01', `mask_sample_r_01')
+define(`Yn_23', `mask_sample_r_23')
+
+define(`Cbn', `mask_sample_g')
+define(`Cbn_01', `mask_sample_g_01')
+define(`Cbn_23', `mask_sample_g_23')
+
+add (16) Yn<1>F Y<8,8,1>F coef_yd { compr align1 };
+
+add (16) Cbn<1>F Cb<8,8,1>F coef_ud { compr align1 };
+
+add (16) Crn<1>F Cr<8,8,1>F coef_vd { compr align1 };
+
+mul (16) acc0<1>F Yn<8,8,1>F coef_ry { compr align1 };
+mac (16) acc0<1>F Cbn<8,8,1>F coef_ru { compr align1 };
+mac.sat (16) src_sample_r<1>F Crn<8,8,1>F coef_rv { compr align1 };
+
+mul (16) acc0<1>F Yn<8,8,1>F coef_gy { compr align1 };
+mac (16) acc0<1>F Cbn<8,8,1>F coef_gu { compr align1 };
+mac.sat(16) src_sample_g<1>F Crn<8,8,1>F coef_gv { compr align1 };
+
+mul (16) acc0<1>F Yn<8,8,1>F coef_by { compr align1 };
+mac (16) acc0<1>F Cbn<8,8,1>F coef_bu { compr align1 };
+mac.sat(16) src_sample_b<1>F Crn<8,8,1>F coef_bv { compr align1 };
+
+ /*
+ * A = 1.0
+ */
+mov (16) src_sample_a<1>F 1.0F { compr align1 };
diff --git a/src/shaders/utils/Makefile.am b/src/shaders/utils/Makefile.am
index b8f3121..dd19d62 100644
--- a/src/shaders/utils/Makefile.am
+++ b/src/shaders/utils/Makefile.am
@@ -6,6 +6,9 @@ MFC_CORE_AVC = \
mfc_batchbuffer_avc_intra.asm \
mfc_batchbuffer_avc_inter.asm
+MFC_CORE_HSW = \
+ mfc_batchbuffer_hsw.asm
+
INTEL_G6B = mfc_batchbuffer_avc_intra.g6b mfc_batchbuffer_avc_inter.g6b
INTEL_G6A = mfc_batchbuffer_avc_intra.g6a mfc_batchbuffer_avc_inter.g6a
INTEL_GEN6_INC = mfc_batchbuffer.inc
@@ -16,15 +19,21 @@ INTEL_G7A = mfc_batchbuffer_avc_intra.g7a mfc_batchbuffer_avc_inter.g7a
INTEL_GEN7_INC = mfc_batchbuffer.inc
INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm)
+INTEL_G75B = mfc_batchbuffer_hsw.g75b
+INTEL_G75A = mfc_batchbuffer_hsw.g75a
+INTEL_GEN75_INC = mfc_batchbuffer_hsw.inc
+INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm)
+
TARGETS =
if HAVE_GEN4ASM
TARGETS += $(INTEL_G6B)
TARGETS += $(INTEL_G7B)
+TARGETS += $(INTEL_G75B)
endif
all-local: $(TARGETS)
-SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm
+SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm
if HAVE_GEN4ASM
$(INTEL_GEN6_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN6_INC)
@@ -42,19 +51,31 @@ $(INTEL_GEN7_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN7_INC)
rm _mfc0.$@
.gen7.asm.g7b:
$(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
+
+$(INTEL_GEN75_ASM): $(MFC_CORE_HSW) $(INTEL_GEN75_INC)
+.g75a.gen75.asm:
+ $(AM_V_GEN)cpp -P $< > _mfc0.$@ && \
+ m4 _mfc0.$@ > $@ && \
+ rm _mfc0.$@
+.gen75.asm.g75b:
+ $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
endif
-CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM)
+CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM)
EXTRA_DIST = \
$(INTEL_G6A) \
$(INTEL_G6B) \
$(INTEL_G7A) \
$(INTEL_G7B) \
+ $(INTEL_G75A) \
+ $(INTEL_G75B) \
$(INTEL_GEN6_INC) \
$(INTEL_GEN7_INC) \
+ $(INTEL_GEN75_INC) \
$(MFC_CORE) \
$(MFC_CORE_AVC) \
+ $(MFC_CORE_HSW) \
$(NULL)
# Extra clean files so that maintainer-clean removes *everything*
diff --git a/src/shaders/utils/mfc_batchbuffer.inc b/src/shaders/utils/mfc_batchbuffer.inc
index c83d5d4..c3a0fec 100644
--- a/src/shaders/utils/mfc_batchbuffer.inc
+++ b/src/shaders/utils/mfc_batchbuffer.inc
@@ -139,6 +139,8 @@ define(`mb_y', `inline_reg0.17') /* :ub, */
define(`mb_xy', `inline_reg0.16') /* :uw, */
define(`width_in_mb', `inline_reg0.20') /* :uw, the picture width in macroblocks */
define(`qp', `inline_reg0.22') /* :ub, */
+define(`ref_idx0', `inline_reg0.24') /* :ud */
+define(`ref_idx1', `inline_reg0.28') /* :ud */
/*
* GRF 8~15 -- temporary registers
diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm
index 59152b8..549f021 100644
--- a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm
+++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm
@@ -103,6 +103,12 @@ __FILL_INTER_PAK_COMMAND:
/* DW7 */
mov (1) pak_object7_ud<1>:ud ob_read_wb0.4<0,1,0>:ud {align1} ;
+ /* DW8 */
+ mov (1) pak_object8_ud<1>:ud ref_idx0<0,1,0>:ud {align1} ;
+
+ /* DW9 */
+ mov (1) pak_object9_ud<1>:ud ref_idx1<0,1,0>:ud {align1} ;
+
jmpi (1) __OUTPUT_PAK_COMMAND ;
__FILL_INTRA_PAK_COMMAND:
diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
index 2e1703e..24b268f 100644
--- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
+++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
@@ -24,7 +24,7 @@
{ 0x00800001, 0x23400061, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
{ 0x01000005, 0x20000c20, 0x00000200, 0x00002000 },
- { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000026 },
{ 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
{ 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 },
{ 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
@@ -41,6 +41,8 @@
{ 0x00110001, 0x23580061, 0x00000000, 0x00000000 },
{ 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
{ 0x00000001, 0x235c0021, 0x00000204, 0x00000000 },
+ { 0x00000001, 0x23600021, 0x000000b8, 0x00000000 },
+ { 0x00000001, 0x23640021, 0x000000bc, 0x00000000 },
{ 0x00000020, 0x34001c00, 0x00001400, 0x00000022 },
{ 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
{ 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
@@ -67,7 +69,7 @@
{ 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
{ 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 },
{ 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9a },
{ 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
{ 0x00600001, 0x20000022, 0x008d0120, 0x00000000 },
{ 0x05800031, 0x22001cc9, 0x00000000, 0x021a0001 },
diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
index 1664010..f0e2012 100644
--- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
+++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
@@ -24,7 +24,7 @@
{ 0x00800001, 0x23400061, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
{ 0x01000005, 0x20000c20, 0x00000200, 0x00002000 },
- { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000026 },
{ 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
{ 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 },
{ 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
@@ -41,6 +41,8 @@
{ 0x00110001, 0x23580061, 0x00000000, 0x00000000 },
{ 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
{ 0x00000001, 0x235c0021, 0x00000204, 0x00000000 },
+ { 0x00000001, 0x23600021, 0x000000b8, 0x00000000 },
+ { 0x00000001, 0x23640021, 0x000000bc, 0x00000000 },
{ 0x00000020, 0x34001c00, 0x00001400, 0x00000022 },
{ 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
{ 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
@@ -67,7 +69,7 @@
{ 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
{ 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 },
{ 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9a },
{ 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
{ 0x00600001, 0x28000021, 0x008d0120, 0x00000000 },
{ 0x0a800031, 0x22001ca9, 0x00000800, 0x02180001 },
diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.asm b/src/shaders/utils/mfc_batchbuffer_hsw.asm
new file mode 100644
index 0000000..c34e934
--- /dev/null
+++ b/src/shaders/utils/mfc_batchbuffer_hsw.asm
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2010-2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+START:
+ mov (16) pak_object_reg0.0<1>:ud 0x0:ud {align1};
+ mov (8) obw_m0.0<1>:ud 0x0:ud {align1};
+ mov (8) mb_cur_msg.0<1>:ud 0x0:ud {align1};
+ mov (16) mb_temp.0<1>:ud 0x0:ud {align1};
+ mov (1) cur_mb_x<1>:uw mb_x<0,1,0>:ub {align1};
+ mov (1) cur_mb_y<1>:uw mb_y<0,1,0>:ub {align1};
+ mov (1) end_mb_x<1>:uw slice_end_x<0,1,0>:ub {align1};
+ mov (1) end_mb_y<1>:uw slice_end_y<0,1,0>:ub {align1};
+ mov (1) end_loop_count<1>:uw total_mbs<0,1,0>:uw {align1};
+ mov (1) vme_len<1>:ud 2:ud {align1};
+ and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1};
+ (f0.0) mov (1) vme_len<1>:ud 24:ud {align1};
+
+ mov (1) obw_m0.8<1>:UD buffer_offset<0,1,0>:ud {align1};
+ mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+ mul (1) mb_cur_msg.8<1>:UD width_in_mbs<0,1,0>:UW cur_mb_y<0,1,0>:UW {align1};
+ add (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD cur_mb_x<0,1,0>:uw {align1};
+ mul (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD vme_len<0,1,0>:UD {align1};
+ mov (1) mb_cur_msg.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+ mov (1) pak_object0_ud<1>:ud MFC_AVC_PAK_OBJECT_DW0:ud {align1};
+ mov (1) pak_object5_ud<1>:ud MFC_AVC_PAK_OBJECT_DW5:ud {align1};
+ mov (1) pak_object10_ud<1>:ud MFC_AVC_PAK_OBJECT_DW10:ud {align1};
+ mov (1) pak_object6_ud<1>:ub qp_flag<0,1,0>:ub {align1};
+
+pak_object_loop:
+ mov (8) mb_msg0.0<1>:ud mb_cur_msg.0<8,8,1>:ud {align1};
+ mov (1) pak_object4_ud<1>:ud MFC_AVC_PAK_OBJECT_DW4:ud {align1};
+ mov (1) tmp_reg0.0<1>:ub cur_mb_x<0,1,0>:ub {align1};
+ mov (1) tmp_reg0.1<1>:ub cur_mb_y<0,1,0>:ub {align1};
+ mov (1) pak_object4_ud<1>:uw tmp_reg0.0<0,1,0>:uw {align1};
+ /* pak_object6_ud */
+ mov (1) pak_object_reg0.26<1>:uw 0x0:uw {align1};
+
+ cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw end_mb_x<0,1,0>:uw {align1};
+ (-f0.0) jmpi (1) start_mb_flag;
+ cmp.e.f0.0 (1) null:uw cur_mb_y<0,1,0>:uw end_mb_y<0,1,0>:uw {align1};
+ (f0.0) mov (1) pak_object_reg0.26<1>:uw MFC_AVC_PAK_LAST_MB:uw {align1};
+start_mb_flag:
+ and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1};
+ (f0.0) jmpi (1) inter_frame_start;
+
+/* bind index 0, read 2 oword (32bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ MV_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+ jmpi (1) intra_pak_command;
+
+nop;
+nop;
+inter_frame_start:
+/* bind index 0, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ MV_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) jmpi (1) intra_pak_command;
+
+/* MV len and MV mode */
+ and (1) pak_object3_ud<1>:ud mb_inter_wb.0<0,1,0>:ud MFC_AVC_INTER_MASK_DW3:ud {align1};
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_PAK_CBP:ud {align1};
+ and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1};
+ mov (1) pak_object1_ud<1>:ud 32:ud {align1};
+ cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_8X8MODE:uw {align1};
+ (-f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1};
+ (-f0.0) jmpi (1) inter_mv_check;
+ and.nz.f0.0 (1) null:ud mb_inter_wb.4<0,1,0>:uw SUBSHAPE_MASK:uw {align1};
+ (f0.0) mov (1) pak_object1_ud<1>:ud 128:ud {align1};
+ (f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV32:ud {align1};
+ (f0.0) jmpi (1) mv_check_end;
+
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1};
+
+inter_mv_check:
+ and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1};
+ cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_16X16MODE:uw {align1};
+ (f0.0) jmpi (1) mv_check_end;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB A */
+/* bind index 0, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ MV_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+
+ mov (2) mb_mv0.8<1>:ud mb_mv1.0<2,2,1>:ud {align1};
+ mov (2) mb_mv0.16<1>:ud mb_mv2.0<2,2,1>:ud {align1};
+ mov (2) mb_mv0.24<1>:ud mb_mv3.0<2,2,1>:ud {align1};
+
+ mov (8) msg_reg0.0<1>:ud mb_msg0.0<8,8,1>:ud {align1} ;
+ mov (8) msg_reg1.0<1>:ud mb_mv0.0<8,8,1>:ud {align1} ;
+/* Write MV for MB A */
+/* bind index 0, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ MV_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+mv_check_end:
+
+/* ref list */
+ mov (1) pak_object8_ud<1>:ud fwd_ref<0,1,0>:ud {align1};
+ mov (1) pak_object9_ud<1>:ud bwd_ref<0,1,0>:ud {align1};
+/* inter_mode. pak_object7_ud */
+ mov (1) pak_object7_ud<1>:ud 0x0:ud {align1};
+ mov (1) pak_object_reg0.28<1>:ub mb_inter_wb.5<0,1,0>:ub {align1};
+ mov (1) pak_object_reg0.29<1>:ub mb_inter_wb.6<0,1,0>:ub {align1};
+
+/* mv start address */
+ add (1) tmp_reg0.4<1>:ud mb_cur_msg.8<0,1,0>:ud 3:ud {align1};
+ mul (1) pak_object2_ud<1>:ud tmp_reg0.4<0,1,0>:ud 16:ud {align1};
+
+ jmpi (1) write_pak_command;
+
+intra_pak_command:
+ /* object 1/2 is set to zero */
+ mov (2) pak_object1_ud<1>:ud 0x0:ud {align1};
+ /* object 7/8 intra mode */
+ mov (1) pak_object7_ud<1>:ud mb_intra_wb.4<0,1,0>:ud {align1};
+ mov (1) pak_object8_ud<1>:ud mb_intra_wb.8<0,1,0>:ud {align1};
+ /* object 9 Intra structure */
+ mov (1) pak_object9_ud<1>:ud 0x0:ud {align1};
+ mov (1) pak_object9_ud<1>:ub mb_intra_wb.12<0,1,0>:ub {align1};
+
+ and (1) pak_object3_ud<1>:ud mb_intra_wb.0<0,1,0>:ud MFC_AVC_INTRA_MASK_DW3:ud {align1};
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_INTRA_FLAG + MFC_AVC_PAK_CBP:ud {align1};
+
+ mov (1) tmp_reg0.0<1>:ud 0:ud {align1};
+ mov (1) tmp_reg0.1<1>:ub mb_intra_wb.2<0,1,0>:ub {align1};
+ and (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw AVC_INTRA_MASK:uw {align1};
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud tmp_reg0.0<0,1,0>:ud {align1};
+
+/* Write the pak command into the batchbuffer */
+write_pak_command:
+ mov (8) msg_reg0.0<1>:ud obw_m0.0<8,8,1>:ud {align1} ;
+ mov (8) msg_reg1.0<1>:ud pak_object_reg0.0<8,8,1>:ud {align1} ;
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ MFC_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+ add (1) msg_reg0.8<1>:ud msg_reg0.8<0,1,0>:ud 2:ud {align1};
+ mov (8) msg_reg1.0<1>:ud pak_object_reg1.0<8,8,1>:ud {align1};
+
+/* bind index 3, write 1 oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ MFC_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+
+/* Check the next mb */
+add (1) cur_loop_count<1>:uw cur_loop_count<0,1,0>:uw 1:uw {align1};
+cmp.e.f0.0 (1) null:uw cur_loop_count<0,1,0>:uw end_loop_count<0,1,0>:uw {align1};
+(f0.0) jmpi (1) pak_loop_end;
+/* the buffer offset for next block */
+add (1) obw_m0.8<1>:ud obw_m0.8<0,1,0>:ud 3:uw {align1};
+add (1) mb_cur_msg.8<1>:ud mb_cur_msg.8<0,1,0>:ud vme_len<0,1,0>:ud {align1};
+add (1) cur_mb_x<1>:uw cur_mb_x<0,1,0>:uw 1:uw {align1};
+/* Check whether it is already equal to width in mbs */
+cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw width_in_mbs<0,1,0>:uw {align1};
+(f0.0) add (1) cur_mb_y<1>:uw cur_mb_y<0,1,0>:uw 1:uw {align1};
+(f0.0) mov (1) cur_mb_x<1>:uw 0:uw {align1};
+
+/* continue the pak command for next mb */
+jmpi (1) pak_object_loop;
+nop;
+nop;
+pak_loop_end:
+/* Issue message fence so that the previous write message is committed */
+send (16)
+ msg_ind
+ mb_wb.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_FENCE,
+ OBR_MF_COMMIT,
+ MFC_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (1) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+nop;
+
diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g75a b/src/shaders/utils/mfc_batchbuffer_hsw.g75a
new file mode 100644
index 0000000..4a96754
--- /dev/null
+++ b/src/shaders/utils/mfc_batchbuffer_hsw.g75a
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2010-2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+#include "mfc_batchbuffer_hsw.inc"
+#include "mfc_batchbuffer_hsw.asm"
+
diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g75b b/src/shaders/utils/mfc_batchbuffer_hsw.g75b
new file mode 100644
index 0000000..2f42643
--- /dev/null
+++ b/src/shaders/utils/mfc_batchbuffer_hsw.g75b
@@ -0,0 +1,105 @@
+ { 0x00800001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x2ac00061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x2ac00229, 0x000000a8, 0x00000000 },
+ { 0x00000001, 0x2ac20229, 0x000000a9, 0x00000000 },
+ { 0x00000001, 0x2ae00229, 0x000000b0, 0x00000000 },
+ { 0x00000001, 0x2ae20229, 0x000000b1, 0x00000000 },
+ { 0x00000001, 0x2ae40129, 0x000000ac, 0x00000000 },
+ { 0x00000001, 0x2ae80061, 0x00000000, 0x00000002 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010001, 0x2ae80061, 0x00000000, 0x00000018 },
+ { 0x00000001, 0x21e80021, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x21f40231, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x2b082521, 0x000000aa, 0x00000ac2 },
+ { 0x00000040, 0x2b082421, 0x00000b08, 0x00000ac0 },
+ { 0x00000041, 0x2b080421, 0x00000b08, 0x00000ae8 },
+ { 0x00000001, 0x2b140231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x23400061, 0x00000000, 0x7149000a },
+ { 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
+ { 0x00000001, 0x23680061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23580231, 0x000000a6, 0x00000000 },
+ { 0x00600001, 0x2b400021, 0x008d0b00, 0x00000000 },
+ { 0x00000001, 0x23500061, 0x00000000, 0xffff0000 },
+ { 0x00000001, 0x21000231, 0x00000ac0, 0x00000000 },
+ { 0x00000001, 0x21010231, 0x00000ac2, 0x00000000 },
+ { 0x00000001, 0x23500129, 0x00000100, 0x00000000 },
+ { 0x00000001, 0x235a0169, 0x00000000, 0x00000000 },
+ { 0x01000010, 0x20002528, 0x00000ac0, 0x00000ae0 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 },
+ { 0x01000010, 0x20002528, 0x00000ac2, 0x00000ae2 },
+ { 0x00010001, 0x235a0169, 0x00000000, 0x04000400 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 },
+ { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02180200 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000240 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280300 },
+ { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x000001f0 },
+ { 0x00000005, 0x234c0c21, 0x00000b80, 0x1f00ffff },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e0000 },
+ { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 },
+ { 0x00000001, 0x23440061, 0x00000000, 0x00000020 },
+ { 0x01000010, 0x20002d28, 0x00000100, 0x00030003 },
+ { 0x00110040, 0x234c0c21, 0x0000034c, 0x00400000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000050 },
+ { 0x02000005, 0x20002d20, 0x00000b84, 0xff00ff00 },
+ { 0x00010001, 0x23440061, 0x00000000, 0x00000080 },
+ { 0x00010040, 0x234c0c21, 0x0000034c, 0x00600000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x00400000 },
+ { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 },
+ { 0x01000010, 0x20002d28, 0x00000100, 0x00000000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000080 },
+ { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480400 },
+ { 0x00200001, 0x2ba80021, 0x00450bc0, 0x00000000 },
+ { 0x00200001, 0x2bb00021, 0x00450be0, 0x00000000 },
+ { 0x00200001, 0x2bb80021, 0x00450c00, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0b40, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0ba0, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0200 },
+ { 0x00000001, 0x23600021, 0x000000b4, 0x00000000 },
+ { 0x00000001, 0x23640021, 0x000000b8, 0x00000000 },
+ { 0x00000001, 0x235c0061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x235c0231, 0x00000b85, 0x00000000 },
+ { 0x00000001, 0x235d0231, 0x00000b86, 0x00000000 },
+ { 0x00000040, 0x21040c21, 0x00000b08, 0x00000003 },
+ { 0x00000041, 0x23480c21, 0x00000104, 0x00000010 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000b0 },
+ { 0x00200001, 0x23440061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x235c0021, 0x00000b64, 0x00000000 },
+ { 0x00000001, 0x23600021, 0x00000b68, 0x00000000 },
+ { 0x00000001, 0x23640061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23640231, 0x00000b6c, 0x00000000 },
+ { 0x00000005, 0x234c0c21, 0x00000b60, 0x0000c0ff },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e2000 },
+ { 0x00000001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x21010231, 0x00000b62, 0x00000000 },
+ { 0x00000005, 0x21002d29, 0x00000100, 0x1f001f00 },
+ { 0x00000040, 0x234c0421, 0x0000034c, 0x00000100 },
+ { 0x00600001, 0x28000021, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0340, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0202 },
+ { 0x00000040, 0x28080c21, 0x00000808, 0x00000002 },
+ { 0x00600001, 0x28200021, 0x008d0360, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0002 },
+ { 0x00000040, 0x2ac42d29, 0x00000ac4, 0x00010001 },
+ { 0x01000010, 0x20002528, 0x00000ac4, 0x00000ae4 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000090 },
+ { 0x00000040, 0x21e82c21, 0x000001e8, 0x00030003 },
+ { 0x00000040, 0x2b080421, 0x00000b08, 0x00000ae8 },
+ { 0x00000040, 0x2ac02d29, 0x00000ac0, 0x00010001 },
+ { 0x01000010, 0x20002528, 0x00000ac0, 0x000000aa },
+ { 0x00010040, 0x2ac22d29, 0x00000ac2, 0x00010001 },
+ { 0x00010001, 0x2ac00169, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0xfffffb30 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0a800031, 0x2b601ca1, 0x00000800, 0x0219e002 },
+ { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x24001ca8, 0x00000e00, 0x82000010 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.inc b/src/shaders/utils/mfc_batchbuffer_hsw.inc
new file mode 100644
index 0000000..588006e
--- /dev/null
+++ b/src/shaders/utils/mfc_batchbuffer_hsw.inc
@@ -0,0 +1,195 @@
+/*
+ * Copyright © 2010-2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r7 reserved
+ * r8~r15 temporary registers
+ * r16 write back of Oword Block Write
+ */
+
+/*
+ * GRF 0 -- header
+ */
+define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
+
+define(`inline_reg0', `r5')
+define(`buffer_offset', `inline_reg0.0') /* :ud, in units of Owords */
+/* :ub,
+ * bit0 indicates the frame type. 1 is the I-frame. 0 is P-B frame
+ */
+define(`mb_flag', `inline_reg0.4')
+define(`qp_flag', `inline_reg0.6') /* :ub */
+
+define(`mb_x', `inline_reg0.8') /* :ub, */
+define(`mb_y', `inline_reg0.9') /* :ub, */
+define(`mb_xy', `inline_reg0.8') /* :uw, */
+/* :uw, the picture width in macroblocks */
+define(`width_in_mbs', `inline_reg0.10')
+/* :w, the number of macroblock commands being processed by the kernel */
+define(`total_mbs', `inline_reg0.12')
+/* ub, the mb x/y of the last mb in slice */
+define(`slice_end_x', `inline_reg0.16')
+define(`slice_end_y', `inline_reg0.17')
+
+/* :ud the forward reference picture list */
+define(`fwd_ref', `inline_reg0.20')
+/* :ud the backward reference picture list */
+define(`bwd_ref', `inline_reg0.24')
+
+/*
+ * GRF 8~15 -- temporary registers
+ */
+define(`tmp_reg0', `r8')
+define(`tmp_reg1', `r9')
+define(`tmp_reg2', `r10')
+define(`tmp_reg3', `r11')
+define(`tmp_reg4', `r12')
+define(`tmp_reg5', `r13')
+define(`tmp_reg6', `r14')
+define(`tmp_reg7', `r15')
+
+define(`obw_m0', `tmp_reg7')
+
+define(`obw_wb', `null<1>:W')
+define(`obw_wb_length', `0')
+
+/*
+ * GRF 26~27
+ */
+define(`pak_object_reg0', `r26')
+define(`pak_object0_ud', `r26.0')
+define(`pak_object1_ud', `r26.4')
+define(`pak_object2_ud', `r26.8')
+define(`pak_object3_ud', `r26.12')
+define(`pak_object4_ud', `r26.16')
+define(`pak_object5_ud', `r26.20')
+define(`pak_object6_ud', `r26.24')
+define(`pak_object7_ud', `r26.28')
+
+define(`pak_object_reg1', `r27')
+define(`pak_object8_ud', `r27.0')
+define(`pak_object9_ud', `r27.4')
+define(`pak_object10_ud', `r27.8')
+define(`pak_object11_ud', `r27.12')
+
+/*
+ * Message Payload registers
+ */
+define(`msg_ind', `64')
+define(`msg_reg0', `g64')
+define(`msg_reg1', `g65')
+define(`msg_reg2', `g66')
+define(`msg_reg3', `g67')
+define(`msg_reg4', `g68')
+define(`msg_reg5', `g69')
+define(`msg_reg6', `g70')
+define(`msg_reg7', `g71')
+define(`msg_reg8', `g72')
+
+define(`MV_BIND_IDX', `0')
+define(`MFC_BIND_IDX', `2')
+
+define(`ts_msg_ind', `112')
+define(`ts_msg_reg0', `r112')
+
+
+define(`MFC_AVC_PAK_OBJECT_DW0', `0x7149000a')
+define(`MFC_AVC_PAK_OBJECT_DW4', `0xFFFF0000') /* CBP for Y */
+define(`MFC_AVC_PAK_OBJECT_DW5', `0x000F000F')
+define(`MFC_AVC_PAK_OBJECT_DW10', `0x0000000')
+
+define(`OBR_MESSAGE_TYPE', `0')
+define(`OBR_CACHE_TYPE', `10')
+
+define(`OBR_MESSAGE_FENCE', `7')
+define(`OBR_MF_NOCOMMIT', `0')
+define(`OBR_MF_COMMIT', `0x20')
+
+define(`OBR_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBR_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBR_CONTROL_2', `2') /* 2 OWords */
+define(`OBR_CONTROL_4', `3') /* 4 OWords */
+define(`OBR_CONTROL_8', `4') /* 8 OWords */
+
+define(`OBR_HEADER_PRESENT', `1')
+define(`OBR_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+define(`OBW_CACHE_TYPE', `10')
+
+
+define(`OBW_MESSAGE_TYPE', `8')
+
+define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2', `2') /* 2 OWords */
+define(`OBW_CONTROL_4', `3') /* 4 OWords */
+define(`OBW_CONTROL_8', `4') /* 8 OWords */
+define(`OBW_HEADER_PRESENT', `1')
+
+define(`INTER_MASK', `0x03')
+define(`INTER_16X16MODE', `0x0')
+define(`INTER_16X8MODE', `0x01')
+define(`INTER_8X16MODE', `0x02')
+define(`INTER_8X8MODE', `0x03')
+define(`SUBSHAPE_MASK', `0xFF00')
+
+define(`mb_ind', `90')
+define(`mb_msg0', `r90')
+define(`mb_wb', `r91')
+define(`mb_intra_wb', `r91')
+define(`mb_inter_wb', `r92')
+define(`mb_mv0', `r93')
+define(`mb_mv1', `r94')
+define(`mb_mv2', `r95')
+define(`mb_mv3', `r96')
+
+define(`mb_temp', `r86')
+define(`cur_mb_x', `mb_temp.0') /* :uw, */
+define(`cur_mb_y', `mb_temp.2') /* :uw, */
+define(`cur_loop_count', `mb_temp.4') /* :uw, */
+define(`mb_end', `r87')
+define(`end_mb_x', `mb_end.0') /* :uw, */
+define(`end_mb_y', `mb_end.2') /* :uw, */
+define(`end_loop_count', `mb_end.4') /* :uw, */
+/* :ud the length of VME predict result for every mb. Units in owords */
+define(`vme_len', `mb_end.8')
+define(`mb_cur_msg', `r88')
+
+define(`INTRA_SLICE', `0x0001')
+define(`MFC_AVC_PAK_LAST_MB', `0x0400')
+
+define(`MFC_AVC_INTER_MASK_DW3', `0x1F00FFFF')
+define(`MFC_AVC_INTRA_MASK_DW3', `0x0000C0FF')
+define(`INTER_MV8', `0x00400000')
+define(`INTER_MV32', `0x00600000')
+define(`MFC_AVC_PAK_CBP', `0x000E0000')
+define(`MFC_AVC_INTRA_FLAG', `0x00002000')
+define(`AVC_INTRA_MASK', `0x1F00')
diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am
index e3c401d..d89b689 100644
--- a/src/shaders/vme/Makefile.am
+++ b/src/shaders/vme/Makefile.am
@@ -1,32 +1,41 @@
VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm
-VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb..asm
-VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm
+VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb.asm mpeg2_inter_ivb.asm
+VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm
+VME8_CORE = intra_frame_gen8.asm inter_frame_gen8.asm inter_bframe_gen8.asm mpeg2_inter_gen8.asm
INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b
INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a
INTEL_GEN6_INC = batchbuffer.inc vme.inc
INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm)
-INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b mpeg2_inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b
-INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a mpeg2_inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a
+INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b mpeg2_inter_ivb.g7b
+INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a mpeg2_inter_ivb.g7a
INTEL_GEN7_INC = batchbuffer.inc vme.inc vme7_mpeg2.inc vme7.inc
INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm)
-INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b mpeg2_inter_frame_haswell.g75b inter_bframe_haswell.g75b
-INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a mpeg2_inter_frame_haswell.g75a inter_bframe_haswell.g75a
+INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b inter_bframe_haswell.g75b mpeg2_inter_haswell.g75b
+INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a inter_bframe_haswell.g75a mpeg2_inter_haswell.g75a
INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc
INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm)
+
+INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b
+INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a
+INTEL_GEN8_INC = vme8.inc vme75_mpeg2.inc
+INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm)
+
+
TARGETS =
if HAVE_GEN4ASM
TARGETS += $(INTEL_G6B)
TARGETS += $(INTEL_G7B)
TARGETS += $(INTEL_G75B)
+TARGETS += $(INTEL_G8B)
endif
all-local: $(TARGETS)
-SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm
+SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm .g8a .g8b .gen8.asm
if HAVE_GEN4ASM
$(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC)
@@ -37,7 +46,7 @@ $(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC)
.gen6.asm.g6b:
$(AM_V_GEN)$(GEN4ASM) -g 6 -o $@ $<
-$(INTEL_GEN7_ASM): $(VME_CORE) $(INTEL_GEN7_INC)
+$(INTEL_GEN7_ASM): $(VME7_CORE) $(INTEL_GEN7_INC)
.g7a.gen7.asm:
$(AM_V_GEN)cpp -P -DDEV_IVB $< > _vme0.$@ && \
m4 _vme0.$@ > $@ && \
@@ -53,9 +62,18 @@ $(INTEL_GEN75_ASM): $(VME75_CORE) $(INTEL_GEN75_INC)
rm _vme0.$@
.gen75.asm.g75b:
$(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
+
+$(INTEL_GEN8_ASM): $(VME8_CORE) $(INTEL_GEN8_INC)
+.g8a.gen8.asm:
+ $(AM_V_GEN)cpp -P $< > _vme0.$@ && \
+ m4 _vme0.$@ > $@ && \
+ rm _vme0.$@
+.gen8.asm.g8b:
+ $(AM_V_GEN)$(GEN4ASM) -g 8 -o $@ $<
+
endif
-CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM)
+CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) $(INTEL_GEN8_ASM)
EXTRA_DIST = \
$(INTEL_G6A) \
@@ -64,13 +82,15 @@ EXTRA_DIST = \
$(INTEL_G75B) \
$(INTEL_G7A) \
$(INTEL_G7B) \
- $(INTEL_GEN6_ASM) \
+ $(INTEL_G8A) \
+ $(INTEL_G8B) \
$(INTEL_GEN6_INC) \
- $(INTEL_GEN75_ASM) \
$(INTEL_GEN75_INC) \
- $(INTEL_GEN7_ASM) \
$(INTEL_GEN7_INC) \
+ $(INTEL_GEN8_INC) \
$(VME75_CORE) \
+ $(VME7_CORE) \
+ $(VME8_CORE) \
$(VME_CORE) \
$(NULL)
diff --git a/src/shaders/vme/inter_bframe_gen8.asm b/src/shaders/vme/inter_bframe_gen8.asm
new file mode 100644
index 0000000..240dc61
--- /dev/null
+++ b/src/shaders/vme/inter_bframe_gen8.asm
@@ -0,0 +1,875 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ * Authors: Zhao Yakui <yakui.zhao@intel.com>
+ */
+// Modual name: Inter_bframe_haswell.asm
+//
+// Make inter predition estimation for Inter frame for B-frame
+//
+
+//
+// Now, begin source code....
+//
+
+#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud
+#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud
+
+/*
+ * __START
+ */
+__INTER_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels
+ */
+/* ROW */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+
+/*
+ * Media Read Message -- fetch Chroma neighbor edge pixels
+ */
+/* ROW */
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */
+mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1};
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */
+mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1};
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1};
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+mov (8) vme_m1.0<1>:ud 0:ud {align1};
+mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+
+/* read back the data for MB A */
+/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
+* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
+*/
+mov (8) mba_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbb_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbc_result.0<1>:ud 0x0:ud {align1};
+mba_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+mov (1) mba_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB A */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* MV */
+mov (2) mba_result.20<1>:w -1:w {align1};
+mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK1:ud {align1};
+SAVE_RET {align1};
+jmpi (1) mb_pred_func;
+mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1};
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1};
+(f0.0) mov (1) mba_result.16<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mba_result.20<1>:w 0:w {align1};
+(f0.0) mov (1) mba_result.4<1>:ud mb_mv1.8<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mbb_start;
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1};
+(f0.0) mov (1) mba_result.18<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mba_result.22<1>:w 0:w {align1};
+(f0.0) mov (1) mba_result.8<1>:ud mb_mv1.12<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mbb_start;
+mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1};
+mov (2) mba_result.16<1>:uw MB_PRED_FLAG {align1};
+mov (2) mba_result.20<1>:w 0:w {align1};
+
+mbb_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+/* MB B doesn't exist. Zero MV. mba_flag is zero */
+/* If MB B doesn't exist, neither MB C nor D exists */
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbb_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbc_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB B */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+mov (2) mbb_result.20<1>:w -1:w {align1};
+mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK2:ud {align1};
+SAVE_RET {align1};
+jmpi (1) mb_pred_func;
+mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1};
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1};
+(f0.0) mov (1) mbb_result.16<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mbb_result.20<1>:w 0:w {align1};
+(f0.0) mov (1) mbb_result.4<1>:ud mb_mv2.16<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mbc_start;
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1};
+(f0.0) mov (1) mbb_result.18<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mbb_result.22<1>:w 0:w {align1};
+(f0.0) mov (1) mbb_result.8<1>:ud mb_mv2.20<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mbc_start;
+mov (2) mbb_result.16<1>:uw MB_PRED_FLAG {align1};
+mov (2) mbb_result.20<1>:w 0:w {align1};
+mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
+
+mbc_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1};
+/* MB C doesn't exist. Zero MV. mba_flag is zero */
+/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
+(f0.0) jmpi (1) mbd_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB C */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* Forward MV */
+mov (2) mbc_result.20<1>:w -1:w {align1};
+mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK2:ud {align1};
+SAVE_RET {align1};
+jmpi (1) mb_pred_func;
+mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1};
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1};
+(f0.0) mov (1) mbc_result.16<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mbc_result.20<1>:w 0:w {align1};
+(f0.0) mov (1) mbc_result.4<1>:ud mb_mv2.16<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1};
+(f0.0) mov (1) mbc_result.18<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mbc_result.22<1>:w 0:w {align1};
+(f0.0) mov (1) mbc_result.8<1>:ud mb_mv2.20<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (2) mbc_result.16<1>:uw MB_PRED_FLAG {align1};
+mov (2) mbc_result.20<1>:w 0:w {align1};
+mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
+
+jmpi (1) mb_mvp_start;
+mbd_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB D */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ub
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+
+/* Forward MV */
+mov (2) mbc_result.20<1>:w -1:w {align1};
+mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK3:ud {align1};
+SAVE_RET {align1};
+jmpi (1) mb_pred_func;
+mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1};
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1};
+(f0.0) mov (1) mbc_result.16<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mbc_result.20<1>:w 0:w {align1};
+(f0.0) mov (1) mbc_result.4<1>:ud mb_mv3.24<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1};
+(f0.0) mov (1) mbc_result.18<1>:uw MB_PRED_FLAG {align1};
+(f0.0) mov (1) mbc_result.22<1>:w 0:w {align1};
+(f0.0) mov (1) mbc_result.8<1>:ud mb_mv3.28<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (2) mbc_result.16<1>:uw MB_PRED_FLAG {align1};
+mov (2) mbc_result.20<1>:w 0:w {align1};
+mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1};
+
+mb_mvp_start:
+/*TODO: Add the skip prediction */
+/* Check whether both MB B and C are inavailable */
+add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
+cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
+(-f0.0) jmpi (1) mb_median_start;
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
+(f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1};
+(f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1};
+(f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:uw mba_result.20<2,2,1>:uw {align1};
+(f0.0) mov (2) mb_mvp_ref.0<1>:ud mba_result.4<2,2,1>:ud {align1};
+(-f0.0) mov (2) mb_mvp_ref.0<1>:ud 0:ud {align1};
+jmpi (1) __mb_hwdep_end;
+
+mb_median_start:
+/* forward_MVP */
+/* check whether only one neighbour MB has the same ref ID with the current MB */
+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
+cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1};
+cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
+(f0.0) jmpi (1) mvp_backward;
+
+mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1};
+SAVE_RET {align1};
+ jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1};
+SAVE_RET {align1};
+jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
+
+
+mvp_backward:
+/* check whether only one neighbour MB has the same ref ID with the current MB */
+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
+cmp.z.f0.0 (1) null:d mba_result.22<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.8<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbb_result.22<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.8<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbc_result.22<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.8<0,1,0>:ud {align1};
+cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) mb_mvp_ref.4<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+
+mov (1) INPUT_ARG0.0<1>:w mba_result.8<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.8<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.8<0,1,0>:w {align1};
+SAVE_RET {align1};
+ jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.4<1>:w RET_ARG<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.0<1>:w mba_result.10<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.10<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.10<0,1,0>:w {align1};
+SAVE_RET {align1};
+jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.6<1>:w RET_ARG<0,1,0>:w {align1};
+
+__mb_hwdep_end:
+asr (4) mb_ref_win.0<1>:w mb_mvp_ref.0<4,4,1>:w 2:w {align1};
+add (4) mb_ref_win.8<1>:w mb_ref_win.0<4,4,1>:w 3:w {align1};
+and (4) mb_ref_win.16<1>:uw mb_ref_win.8<4,4,1>:uw 0xFFFC:uw {align1};
+/* m2, get the MV/Mb cost passed from constant buffer when
+spawning thread by MEDIA_OBJECT */
+mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+/* m3 cost center */
+mov (8) vme_m3.0<1>:ud 0x0:ud {align1};
+mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1};
+
+/* m4. skip center */
+mov (8) vme_msg_4<1>:ud 0x0:ud {align1};
+
+/* m5 */
+mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1};
+and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1};
+mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
+/* Use the Luma mode */
+mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1};
+mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m6 */
+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
+mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1};
+mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1};
+
+
+/* m7 */
+
+mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1};
+mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1};
+
+/*
+ * SIC VME message
+ */
+/* m1 */
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* m0 */
+mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_SIC_MESSAGE_TYPE
+ )
+ mlen sic_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+/*
+ * Oword Block Write message
+ */
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+/* Distortion, Intra (17-16), */
+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
+
+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
+/* VME clock counts */
+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_DUAL_REFERENCE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW DREF_REGION_SIZE {align1};
+/* Dual Reference Width&Height,32x32 */
+
+mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1};
+
+/* Reference = (x-8,y-8)-(x+8,y+8) */
+add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -8:W {align1};
+add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -8:W {align1};
+
+mov (1) vme_m0.0<1>:W -8:W {align1};
+mov (1) vme_m0.2<1>:W -8:W {align1};
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1};
+(f0.0) add (1) vme_m0.4<1>:w vme_m0.4<0,1,0>:w 4:w {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1};
+(f0.0) add (1) vme_m0.6<1>:w vme_m0.6<0,1,0>:w 4:w {align1};
+
+add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.20<2,2,1>:w {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* the Max MV number is passed by constant buffer */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+mov (1) vme_m1.8<1>:UD DSTART_CENTER + DSEARCH_PATH_LEN:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+
+/* Setup the Cost center */
+/* currently four 8x8 share the same cost center */
+mov (4) vme_m3.0<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
+mov (4) vme_m3.4<2>:ud mb_mvp_ref.4<0,1,0>:ud {align1};
+
+/* M4/M5 search path */
+
+mov (1) vme_msg_4.0<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1};
+
+mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1};
+mov (8) vme_msg_5.16<1>:UD 0x0:UD {align1};
+
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_IME_MESSAGE_TYPE
+ )
+ mlen ime_vme_msg_length
+ rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
+
+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* Send FBR message into CRE */
+
+mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+
+ /* 16x16 Source, 1/4 pixel, harr, BME ENABLE */
+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_ENABLE:UD {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (1) tmp_reg0.0<1>:uw BI_WEIGHT {align1};
+mov (1) vme_m1.6<1>:UB tmp_reg0.0<0,1,0>:ub {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
+mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_FBR_MESSAGE_TYPE
+ )
+ mlen fbr_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+/* write FME info */
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1};
+/* Inter distortion of FME */
+mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1};
+
+/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME MV */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
+
+
+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_8,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 5
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME RefID */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+
+/* Issue message fence so that the previous write message is committed */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_FENCE,
+ OBR_MF_COMMIT,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+
+ nop ;
+ nop ;
+/* Compare three word data to get the min value */
+word_imin:
+ cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ RETURN {align1};
+
+/* Compare three word data to get the max value */
+word_imax:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ RETURN {align1};
+
+word_imedian:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_a_ge_b;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ jmpi (1) cmp_end;
+cmp_a_ge_b:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+cmp_end:
+ RETURN {align1};
+
+mb_pred_func:
+ mov (8) TEMP_VAR0.0<1>:ud 0:ud {align1};
+ mov (1) TEMP_VAR0.0<1>:ub INPUT_ARG0.2<0,1,0>:ub {align1};
+ and (1) TEMP_VAR0.4<1>:uw INPUT_ARG0.4<0,1,0>:uw INTER_MASK:uw {align1};
+ /* INTER16x16 mode. The bit1-0 is the prediction mode */
+ cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_16X16MODE:uw {align1};
+ (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1};
+ (f0.0) jmpi (1) end_mb_pred;
+ /* Check whether it is INTER8x8 mode. */
+ cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_8X8MODE:uw {align1};
+ (f0.0) jmpi (1) mb_pred_func_8;
+
+ /* Check whether it is INTER16x8 mode. */
+ cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_16X8MODE:uw {align1};
+ (f0.0) jmpi (1) mb_pred_func_168;
+mb_pred_func_816:
+ /* Block 0/2 uses the bit1-0. Block 1/3 uses the bit3-2 */
+ mov (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw {align1};
+ and.z.f0.0 (1) null:uw TEMP_VAR0.8<0,1,0>:uw INTER_BLOCK1:uw {align1};
+ (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1};
+ (f0.0) jmpi (1) end_mb_pred;
+ shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw 2:uw {align1};
+ and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1};
+ jmpi (1) end_mb_pred;
+
+mb_pred_func_168:
+ /* Block 0/1 uses the bit1-0. Block 2/3 uses the bit3-2 */
+ mov (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw {align1};
+ cmp.l.f0.0 (1) null:uw TEMP_VAR0.8<0,1,0>:uw INTER_BLOCK2:uw {align1};
+ (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1};
+ (f0.0) jmpi (1) end_mb_pred;
+ shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw 2:uw {align1};
+ and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1};
+ jmpi (1) end_mb_pred;
+
+mb_pred_func_8:
+ /* 8X8 mode. Every block uses two bits as the prediction mode. */
+ mul (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw 2:uw {align1};
+ shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw TEMP_VAR0.8<0,1,0>:uw {align1};
+ and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1};
+end_mb_pred:
+ RETURN {align1};
+
diff --git a/src/shaders/vme/inter_bframe_gen8.g8a b/src/shaders/vme/inter_bframe_gen8.g8a
new file mode 100644
index 0000000..8aff32e
--- /dev/null
+++ b/src/shaders/vme/inter_bframe_gen8.g8a
@@ -0,0 +1,2 @@
+#include "vme8.inc"
+#include "inter_bframe_gen8.asm"
diff --git a/src/shaders/vme/inter_bframe_gen8.g8b b/src/shaders/vme/inter_bframe_gen8.g8b
new file mode 100644
index 0000000..77daf5a
--- /dev/null
+++ b/src/shaders/vme/inter_bframe_gen8.g8b
@@ -0,0 +1,423 @@
+ { 0x00800001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24800608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 },
+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+ { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f },
+ { 0x00000001, 0x24142288, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 },
+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+ { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 },
+ { 0x00000001, 0x24342288, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 },
+ { 0x00000001, 0x24542288, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 },
+ { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 },
+ { 0x00000041, 0x24880208, 0x06000488, 0x00000018 },
+ { 0x00000001, 0x24942288, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 },
+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 },
+ { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 },
+ { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 },
+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 },
+ { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 },
+ { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 },
+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+ { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 },
+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 },
+ { 0x00600001, 0x24600608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000d60 },
+ { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000210 },
+ { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2af41e68, 0x18000000, 0xffffffff },
+ { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 },
+ { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 },
+ { 0x00000001, 0x2fa80608, 0x00000000, 0x00000001 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00001490 },
+ { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 },
+ { 0x00010001, 0x2af01e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2af41e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2ae40208, 0x00000bc8, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 },
+ { 0x00010001, 0x2af21e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2af61e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2ae80208, 0x00000bcc, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 },
+ { 0x00200001, 0x2af01e48, 0x18000000, 0x00010001 },
+ { 0x00200001, 0x2af41e68, 0x18000000, 0x00000000 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000006c0 },
+ { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2b141e68, 0x18000000, 0xffffffff },
+ { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 },
+ { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 },
+ { 0x00000001, 0x2fa80608, 0x00000000, 0x00000002 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00001230 },
+ { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 },
+ { 0x00010001, 0x2b101e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2b141e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2b040208, 0x00000bf0, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 },
+ { 0x00010001, 0x2b121e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2b161e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2b080208, 0x00000bf4, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x00200001, 0x2b101e48, 0x18000000, 0x00010001 },
+ { 0x00200001, 0x2b141e68, 0x18000000, 0x00000000 },
+ { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000230 },
+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
+ { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000003c0 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 },
+ { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 },
+ { 0x00000001, 0x2fa80608, 0x00000000, 0x00000002 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000fe0 },
+ { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 },
+ { 0x00010001, 0x2b301e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2b341e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2b240208, 0x00000bf0, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000002e0 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 },
+ { 0x00010001, 0x2b321e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2b361e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2b280208, 0x00000bf4, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000290 },
+ { 0x00200001, 0x2b301e48, 0x18000000, 0x00010001 },
+ { 0x00200001, 0x2b341e68, 0x18000000, 0x00000000 },
+ { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000250 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000210 },
+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 },
+ { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 },
+ { 0x00000001, 0x2fa80608, 0x00000000, 0x00000003 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000d80 },
+ { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 },
+ { 0x00010001, 0x2b301e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2b341e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2b240208, 0x00000c18, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 },
+ { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 },
+ { 0x00010001, 0x2b321e48, 0x18000000, 0x00010001 },
+ { 0x00010001, 0x2b361e68, 0x18000000, 0x00000000 },
+ { 0x00010001, 0x2b280208, 0x00000c1c, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x00200001, 0x2b301e48, 0x18000000, 0x00010001 },
+ { 0x00200001, 0x2b341e68, 0x18000000, 0x00000000 },
+ { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 },
+ { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 },
+ { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 },
+ { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 },
+ { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 },
+ { 0x00210001, 0x2b040208, 0x00450ae4, 0x00000000 },
+ { 0x00210001, 0x2b240208, 0x00450ae4, 0x00000000 },
+ { 0x00210001, 0x2b141248, 0x00450af4, 0x00000000 },
+ { 0x00210001, 0x2b341248, 0x00450af4, 0x00000000 },
+ { 0x00210001, 0x2ac00208, 0x00450ae4, 0x00000000 },
+ { 0x00310001, 0x2ac00608, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000320 },
+ { 0x00600001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 },
+ { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x000009d0 },
+ { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 },
+ { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000970 },
+ { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 },
+ { 0x00600001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000af6, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000ae8, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b16, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b08, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b36, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b28, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x2ac40208, 0x00000404, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 },
+ { 0x00000001, 0x2fa01a68, 0x00000ae8, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b08, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b28, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000840 },
+ { 0x00000001, 0x2ac41a68, 0x00000fe4, 0x00000000 },
+ { 0x00000001, 0x2fa01a68, 0x00000aea, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b0a, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b2a, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x000007e0 },
+ { 0x00000001, 0x2ac61a68, 0x00000fe4, 0x00000000 },
+ { 0x0040000c, 0x2a801a68, 0x1e690ac0, 0x00020002 },
+ { 0x00400040, 0x2a881a68, 0x1e690a80, 0x00030003 },
+ { 0x00400005, 0x2a901248, 0x16690a88, 0xfffcfffc },
+ { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x25800608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x00600001, 0x28800608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23800608, 0x00000000, 0x00000000 },
+ { 0x00000005, 0x23840208, 0x06000384, 0xff000000 },
+ { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00010001 },
+ { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 },
+ { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 },
+ { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 },
+ { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 },
+ { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 },
+ { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 },
+ { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 },
+ { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 },
+ { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00200020 },
+ { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240208, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280208, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 },
+ { 0x00000001, 0x28340208, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
+ { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00200700 },
+ { 0x00000001, 0x24561648, 0x10000000, 0x20202020 },
+ { 0x00000001, 0x24400208, 0x00000448, 0x00000000 },
+ { 0x00000040, 0x24401a68, 0x1e000440, 0xfff8fff8 },
+ { 0x00000040, 0x24421a68, 0x1e000442, 0xfff8fff8 },
+ { 0x00000001, 0x24401e68, 0x18000000, 0xfff8fff8 },
+ { 0x00000001, 0x24421e68, 0x18000000, 0xfff8fff8 },
+ { 0x00000001, 0x24440208, 0x00000440, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
+ { 0x00010040, 0x24401a68, 0x1e000440, 0x00040004 },
+ { 0x00010040, 0x24441a68, 0x1e000444, 0x00040004 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
+ { 0x00010040, 0x24421a68, 0x1e000442, 0x00040004 },
+ { 0x00010040, 0x24461a68, 0x1e000446, 0x00040004 },
+ { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 },
+ { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a94 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24600608, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 },
+ { 0x00000001, 0x24680608, 0x00000000, 0x00001212 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 },
+ { 0x00400001, 0x45840208, 0x00000ac4, 0x00000000 },
+ { 0x00000001, 0x28800608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28840608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28880608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x288c0608, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28900608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28b00608, 0x00000000, 0x00000000 },
+ { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 },
+ { 0x00000001, 0x25740608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x25752288, 0x00000199, 0x00000000 },
+ { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 },
+ { 0x00000005, 0x24001248, 0x16000180, 0x00030003 },
+ { 0x00000001, 0x25742288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00203000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00200020 },
+ { 0x00000001, 0x24662288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000002 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240208, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28280208, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000001 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000008 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 },
+ { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
+ { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 },
+ { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 },
+ { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
+ { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 },
+ { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 },
+ { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 },
+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 },
+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
+ { 0x00600001, 0x2f600608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x2f602288, 0x00000fa2, 0x00000000 },
+ { 0x00000005, 0x2f641248, 0x16000fa4, 0x00030003 },
+ { 0x01000010, 0x20001240, 0x16000f64, 0x00000000 },
+ { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000150 },
+ { 0x01000010, 0x20001240, 0x16000f64, 0x00030003 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000100 },
+ { 0x01000010, 0x20001240, 0x16000f64, 0x00010001 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 },
+ { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 },
+ { 0x01000005, 0x20001240, 0x16000f68, 0x00010001 },
+ { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000d0 },
+ { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 },
+ { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x000000a0 },
+ { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 },
+ { 0x05000010, 0x20001240, 0x16000f68, 0x00020002 },
+ { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000060 },
+ { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 },
+ { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x00000041, 0x2f681248, 0x16000fa8, 0x00020002 },
+ { 0x00000008, 0x2f701248, 0x12000f60, 0x00000f68 },
+ { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
diff --git a/src/shaders/vme/inter_bframe_haswell.asm b/src/shaders/vme/inter_bframe_haswell.asm
index 9e54b9d..f8ff0af 100644
--- a/src/shaders/vme/inter_bframe_haswell.asm
+++ b/src/shaders/vme/inter_bframe_haswell.asm
@@ -396,7 +396,7 @@ mb_mvp_start:
add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
(-f0.0) jmpi (1) mb_median_start;
-cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1};
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
(f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1};
(f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1};
(f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1};
diff --git a/src/shaders/vme/inter_bframe_haswell.g75b b/src/shaders/vme/inter_bframe_haswell.g75b
index 03da639..cabef20 100644
--- a/src/shaders/vme/inter_bframe_haswell.g75b
+++ b/src/shaders/vme/inter_bframe_haswell.g75b
@@ -186,7 +186,7 @@
{ 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 },
{ 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 },
{ 0x00110020, 0x34001c00, 0x00001400, 0x00000080 },
- { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 },
+ { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 },
{ 0x00210001, 0x2b040021, 0x00450ae4, 0x00000000 },
{ 0x00210001, 0x2b240021, 0x00450ae4, 0x00000000 },
{ 0x00210001, 0x2b140129, 0x00450af4, 0x00000000 },
diff --git a/src/shaders/vme/inter_bframe_ivb.asm b/src/shaders/vme/inter_bframe_ivb.asm
index 577895c..8a75962 100644
--- a/src/shaders/vme/inter_bframe_ivb.asm
+++ b/src/shaders/vme/inter_bframe_ivb.asm
@@ -388,7 +388,7 @@ mb_mvp_start:
add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
(-f0.0) jmpi (1) mb_median_start;
-cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1};
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
(f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1};
(f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1};
(f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1};
diff --git a/src/shaders/vme/inter_bframe_ivb.g7b b/src/shaders/vme/inter_bframe_ivb.g7b
index fe6f98d..adcb390 100644
--- a/src/shaders/vme/inter_bframe_ivb.g7b
+++ b/src/shaders/vme/inter_bframe_ivb.g7b
@@ -180,7 +180,7 @@
{ 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 },
{ 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 },
{ 0x00110020, 0x34001c00, 0x00001400, 0x00000010 },
- { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 },
+ { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 },
{ 0x00210001, 0x2b040021, 0x00450ae4, 0x00000000 },
{ 0x00210001, 0x2b240021, 0x00450ae4, 0x00000000 },
{ 0x00210001, 0x2b140129, 0x00450af4, 0x00000000 },
diff --git a/src/shaders/vme/inter_frame.asm b/src/shaders/vme/inter_frame.asm
index e1b6e68..7c5cfd4 100644
--- a/src/shaders/vme/inter_frame.asm
+++ b/src/shaders/vme/inter_frame.asm
@@ -35,7 +35,11 @@ mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* Source = (x, y) * 16 */
-
+
+cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1};
+(f0.0) jmpi (1) __low_quality_search;
+
+__high_quality_search:
#ifdef DEV_SNB
shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1};
add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */
@@ -47,8 +51,25 @@ mov (1) vme_m0.2<1>:W -12:W {align1} ;
mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */
mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
-mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+jmpi __vme_msg1;
+
+
+__low_quality_search:
+#ifdef DEV_SNB
+shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1};
+add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -8:W {align1};
+add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -8:W {align1};
+#else
+mov (1) vme_m0.0<1>:W -8:W {align1} ;
+mov (1) vme_m0.2<1>:W -8:W {align1} ;
+#endif
+
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; /* 16x16 Source, 1/2 pixel, harr */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
+__vme_msg1:
mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
mov (1) vme_m1.4<1>:UD FB_PRUNING_ENABLE:UD {align1};
/* MV num is passed by constant buffer. R4.28 */
diff --git a/src/shaders/vme/inter_frame.g6b b/src/shaders/vme/inter_frame.g6b
index ca251bb..bc7cd43 100644
--- a/src/shaders/vme/inter_frame.g6b
+++ b/src/shaders/vme/inter_frame.g6b
@@ -11,12 +11,21 @@
{ 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
{ 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
{ 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x01000010, 0x20002e28, 0x000000a8, 0x00020002 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000000e },
{ 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 },
{ 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 },
{ 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 },
{ 0x00000001, 0x244c0061, 0x00000000, 0x00203000 },
{ 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
{ 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x0000000c },
+ { 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24403dad, 0x00000440, 0xfff8fff8 },
+ { 0x00000040, 0x24423dad, 0x00000442, 0xfff8fff8 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x20202020 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
{ 0x00000001, 0x24640061, 0x00000000, 0x40000000 },
{ 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
diff --git a/src/shaders/vme/inter_frame.g7b b/src/shaders/vme/inter_frame.g7b
index 5273200..2a34927 100644
--- a/src/shaders/vme/inter_frame.g7b
+++ b/src/shaders/vme/inter_frame.g7b
@@ -11,11 +11,19 @@
{ 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
{ 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
{ 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x01000010, 0x20002e28, 0x000000a8, 0x00020002 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000000c },
{ 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
{ 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
{ 0x00000001, 0x244c0061, 0x00000000, 0x00203000 },
{ 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
{ 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x0000000a },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x20202020 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
{ 0x00000001, 0x24640061, 0x00000000, 0x40000000 },
{ 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
diff --git a/src/shaders/vme/inter_frame_gen8.asm b/src/shaders/vme/inter_frame_gen8.asm
new file mode 100644
index 0000000..991d903
--- /dev/null
+++ b/src/shaders/vme/inter_frame_gen8.asm
@@ -0,0 +1,760 @@
+/*
+ * Copyright © <2013>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: Inter_frame_gen8.asm
+//
+// Make inter predition estimation for Inter-frame on gen8
+//
+
+//
+// Now, begin source code....
+//
+
+#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud
+#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud
+
+/*
+ * __START
+ */
+__INTER_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels
+ */
+/* ROW */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+
+/*
+ * Media Read Message -- fetch Chroma neighbor edge pixels
+ */
+/* ROW */
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */
+mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1};
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */
+mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1};
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1};
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+/* read back the data for MB A */
+/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
+* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
+*/
+mov (8) mba_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbb_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbc_result.0<1>:ud 0x0:ud {align1};
+mba_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+mov (1) mba_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB A */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* MV */
+mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1};
+mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbb_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+/* MB B doesn't exist. Zero MV. mba_flag is zero */
+/* If MB B doesn't exist, neither MB C nor D exists */
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbb_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbc_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB B */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
+mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbc_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1};
+/* MB C doesn't exist. Zero MV. mba_flag is zero */
+/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
+(f0.0) jmpi (1) mbd_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB C */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+jmpi (1) mb_mvp_start;
+mbd_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB D */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ub
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+mb_mvp_start:
+/*TODO: Add the skip prediction */
+/* Check whether both MB B and C are inavailable */
+add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
+cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
+(-f0.0) jmpi (1) mb_median_start;
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
+(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1};
+(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1};
+jmpi (1) __mb_hwdep_end;
+
+mb_median_start:
+/* check whether only one neighbour MB has the same ref ID with the current MB */
+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
+cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1};
+cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+
+mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1};
+SAVE_RET {align1};
+ jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1};
+SAVE_RET {align1};
+jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
+
+__mb_hwdep_end:
+asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1};
+add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1};
+and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1};
+/* m2, get the MV/Mb cost passed from constant buffer when
+spawning thread by MEDIA_OBJECT */
+mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* m3 FWD/BWD cost center*/
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+
+/* m4 skip center*/
+mov (8) vme_msg_4<1>:UD 0x0:UD {align1};
+
+/* m5 */
+mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1};
+and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1};
+mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
+
+
+/* Use the Luma mode */
+mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1};
+mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m6 */
+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
+mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1};
+mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1};
+
+
+/* m7 */
+
+mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1};
+mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1};
+
+/*
+ * SIC VME message
+ */
+
+/* m1 */
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* m0 */
+mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_SIC_MESSAGE_TYPE
+ )
+ mlen sic_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+/*
+ * Oword Block Write message
+ */
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+/* Distortion, Intra (17-16), */
+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
+
+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
+/* VME clock counts */
+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* IME search */
+cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1};
+(f0.0) jmpi (1) __low_quality_search;
+
+__high_quality_search:
+/* M3/M4 search path */
+mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1};
+mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1};
+
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1};
+jmpi (1) __vme_msg;
+
+__low_quality_search:
+/* M3/M4 search path */
+mov (1) vme_msg_3.0<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.4<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_3.8<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.12<1>:UD 0x000F0F0F:UD {align1};
+mov (4) vme_msg_3.16<1>:UD 0x0:UD {align1};
+mov (8) vme_msg_4.16<1>:UD 0x0:UD {align1};
+
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
+mov (1) vme_m0.0<1>:W -8:W {align1};
+mov (1) vme_m0.2<1>:W -8:W {align1};
+
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1};
+
+__vme_msg:
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* the Max MV number is passed by constant buffer */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* Setup the Cost center */
+/* currently four 8x8 share the same cost center */
+mov (4) vme_m3.0<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
+mov (4) vme_m3.4<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
+
+mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1};
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* M4/M5 search path */
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1};
+
+mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1};
+
+mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1};
+
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_IME_MESSAGE_TYPE
+ )
+ mlen ime_vme_msg_length
+ rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
+
+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* Send FBR message into CRE */
+
+mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+
+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/4 pixel, harr, BME disable */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
+mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_FBR_MESSAGE_TYPE
+ )
+ mlen fbr_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+/* write FME info */
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1};
+/* Inter distortion of FME */
+mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1};
+
+/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME MV */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
+
+
+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_8,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 5
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME RefID */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Issue message fence so that the previous write message is committed */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_FENCE,
+ OBR_MF_COMMIT,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+
+ nop ;
+ nop ;
+/* Compare three word data to get the min value */
+word_imin:
+ cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ RETURN {align1};
+
+/* Compare three word data to get the max value */
+word_imax:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ RETURN {align1};
+
+word_imedian:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_a_ge_b;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ jmpi (1) cmp_end;
+cmp_a_ge_b:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+cmp_end:
+ RETURN {align1};
+
diff --git a/src/shaders/vme/inter_frame_gen8.g8a b/src/shaders/vme/inter_frame_gen8.g8a
new file mode 100644
index 0000000..f514dd3
--- /dev/null
+++ b/src/shaders/vme/inter_frame_gen8.g8a
@@ -0,0 +1,2 @@
+#include "vme8.inc"
+#include "inter_frame_gen8.asm"
diff --git a/src/shaders/vme/inter_frame_gen8.g8b b/src/shaders/vme/inter_frame_gen8.g8b
new file mode 100644
index 0000000..d0cc25d
--- /dev/null
+++ b/src/shaders/vme/inter_frame_gen8.g8b
@@ -0,0 +1,327 @@
+ { 0x00800001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24800608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 },
+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+ { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f },
+ { 0x00000001, 0x24142288, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 },
+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+ { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 },
+ { 0x00000001, 0x24342288, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 },
+ { 0x00000001, 0x24542288, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 },
+ { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 },
+ { 0x00000041, 0x24880208, 0x06000488, 0x00000018 },
+ { 0x00000001, 0x24942288, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 },
+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 },
+ { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 },
+ { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 },
+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 },
+ { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 },
+ { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 },
+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+ { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 },
+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 },
+ { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000750 },
+ { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 },
+ { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 },
+ { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 },
+ { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 },
+ { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 },
+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
+ { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 },
+ { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 },
+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 },
+ { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 },
+ { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 },
+ { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 },
+ { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 },
+ { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 },
+ { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 },
+ { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 },
+ { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 },
+ { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 },
+ { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 },
+ { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 },
+ { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 },
+ { 0x00600001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 },
+ { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000a20 },
+ { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 },
+ { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x000009c0 },
+ { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 },
+ { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 },
+ { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 },
+ { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc },
+ { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x28600608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23800608, 0x00000000, 0x00000000 },
+ { 0x00000005, 0x23840208, 0x06000384, 0xff000000 },
+ { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00010001 },
+ { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 },
+ { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 },
+ { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 },
+ { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 },
+ { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 },
+ { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 },
+ { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 },
+ { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 },
+ { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00200020 },
+ { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240208, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280208, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 },
+ { 0x00000001, 0x28340208, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
+ { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x01000010, 0x20002240, 0x160000a7, 0x00020002 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 },
+ { 0x00000001, 0x28600608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28640608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28680608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x286c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28700608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28740608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28780608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x287c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28800608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28840608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x288c0608, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28900608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 },
+ { 0x00000001, 0x24561648, 0x10000000, 0x28302830 },
+ { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 },
+ { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
+ { 0x00010040, 0x24401a68, 0x1e000440, 0x000c000c },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
+ { 0x00010040, 0x24421a68, 0x1e000442, 0x00080008 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x000000e0 },
+ { 0x00000001, 0x28600608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28640608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28680608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x286c0608, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28700608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28900608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 },
+ { 0x00000001, 0x24561648, 0x10000000, 0x20202020 },
+ { 0x00000001, 0x24401e68, 0x18000000, 0xfff8fff8 },
+ { 0x00000001, 0x24421e68, 0x18000000, 0xfff8fff8 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
+ { 0x00010040, 0x24401a68, 0x1e000440, 0x00040004 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
+ { 0x00010040, 0x24421a68, 0x1e000442, 0x00040004 },
+ { 0x00000001, 0x24440208, 0x00000440, 0x00000000 },
+ { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 },
+ { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a90 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24600608, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 },
+ { 0x00000001, 0x24680608, 0x00000000, 0x30003030 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 },
+ { 0x00400001, 0x45840208, 0x00000ac0, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00000001, 0x28800608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28840608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28900608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28940608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 },
+ { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 },
+ { 0x00000001, 0x25740608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x25752288, 0x00000199, 0x00000000 },
+ { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 },
+ { 0x00000005, 0x24001248, 0x16000180, 0x00030003 },
+ { 0x00000001, 0x25742288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00243000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000002 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240208, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28280208, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000001 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000008 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 },
+ { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
+ { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 },
+ { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 },
+ { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
+ { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 },
+ { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 },
+ { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 },
+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 },
+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm
index 6305c3c..399125a 100644
--- a/src/shaders/vme/inter_frame_haswell.asm
+++ b/src/shaders/vme/inter_frame_haswell.asm
@@ -329,7 +329,7 @@ mb_mvp_start:
add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
(-f0.0) jmpi (1) mb_median_start;
-cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1};
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
@@ -475,24 +475,58 @@ send (16)
{align1};
/* IME search */
-mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
-mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
-
-mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1};
+cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1};
+(f0.0) jmpi (1) __low_quality_search;
-add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+28) */
-add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -12:W {align1};
+__high_quality_search:
+/* M3/M4 search path */
+mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1};
+mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1};
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
mov (1) vme_m0.0<1>:W -16:W {align1};
mov (1) vme_m0.2<1>:W -12:W {align1};
-mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
-
and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1};
and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1};
-
+
+jmpi (1) __vme_msg;
+
+__low_quality_search:
+/* M3/M4 search path */
+mov (1) vme_msg_3.0<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.4<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_3.8<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.12<1>:UD 0x000F0F0F:UD {align1};
+mov (4) vme_msg_3.16<1>:UD 0x0:UD {align1};
+mov (8) vme_msg_4.16<1>:UD 0x0:UD {align1};
+
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
+mov (1) vme_m0.0<1>:W -8:W {align1};
+mov (1) vme_m0.2<1>:W -8:W {align1};
+
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1};
+
+__vme_msg:
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
@@ -507,23 +541,6 @@ mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
-/* M3/M4 search path */
-
-mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1};
-mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1};
-mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1};
-mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1};
-mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1};
-mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1};
-mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1};
-mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1};
-
-mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
-mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
-mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
-mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1};
-
-mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1};
send (8)
vme_msg_ind
diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b
index d9d791d..1a60c51 100644
--- a/src/shaders/vme/inter_frame_haswell.g75b
+++ b/src/shaders/vme/inter_frame_haswell.g75b
@@ -120,7 +120,7 @@
{ 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 },
{ 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 },
{ 0x00110020, 0x34001c00, 0x00001400, 0x00000080 },
- { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 },
+ { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 },
{ 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 },
{ 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 },
{ 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 },
@@ -145,13 +145,13 @@
{ 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000850 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000930 },
{ 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000007f0 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000008d0 },
{ 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
{ 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
{ 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
@@ -192,18 +192,45 @@
{ 0x00000001, 0x28380021, 0x0000019c, 0x00000000 },
{ 0x00000001, 0x283c0021, 0x00000488, 0x00000000 },
{ 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+ { 0x01000010, 0x20002e28, 0x000000a7, 0x00020002 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000160 },
+ { 0x00000001, 0x28600061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28640061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28700061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28740061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28800061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28840061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28900061, 0x00000000, 0x00000000 },
{ 0x00000001, 0x244c0061, 0x00000000, 0x00200000 },
{ 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
- { 0x00000001, 0x24400021, 0x00000448, 0x00000000 },
- { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 },
- { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 },
{ 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
{ 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
- { 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
{ 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
{ 0x00010040, 0x24403dad, 0x00000440, 0x000c000c },
{ 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
{ 0x00010040, 0x24423dad, 0x00000442, 0x00080008 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000e0 },
+ { 0x00000001, 0x28600061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28640061, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28680061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x286c0061, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28700061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28900061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00200000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x20202020 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
+ { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
+ { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 },
+ { 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
{ 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 },
{ 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
@@ -214,19 +241,6 @@
{ 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
- { 0x00000001, 0x28600061, 0x00000000, 0x01010101 },
- { 0x00000001, 0x28640061, 0x00000000, 0x10010101 },
- { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f },
- { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f },
- { 0x00000001, 0x28700061, 0x00000000, 0x01010101 },
- { 0x00000001, 0x28740061, 0x00000000, 0x10010101 },
- { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f },
- { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f },
- { 0x00000001, 0x28800061, 0x00000000, 0x01010101 },
- { 0x00000001, 0x28840061, 0x00000000, 0x10010101 },
- { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f },
- { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f },
- { 0x00400001, 0x28900061, 0x00000000, 0x00000000 },
{ 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 },
{ 0x00000001, 0x25740061, 0x00000000, 0x00000000 },
{ 0x00000001, 0x25750231, 0x00000199, 0x00000000 },
diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm
index b5cafdd..46f2b4b 100644
--- a/src/shaders/vme/inter_frame_ivb.asm
+++ b/src/shaders/vme/inter_frame_ivb.asm
@@ -323,7 +323,7 @@ mb_mvp_start:
add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
(-f0.0) jmpi (1) mb_median_start;
-cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1};
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
@@ -391,12 +391,14 @@ mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
/* M0 */
/* IME search */
+cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1};
+(f0.0) jmpi (1) __low_quality_search;
+
+__high_quality_search:
mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1};
/* 16x16 Source, 1/4 pixel, harr */
mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
-mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1};
-
mov (1) vme_m0.0<1>:W -16:W {align1};
mov (1) vme_m0.2<1>:W -12:W {align1};
@@ -405,6 +407,22 @@ and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw
and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1};
+jmpi __vme_msg;
+
+__low_quality_search:
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1};
+/* 16x16 Source, 1/2 pixel, harr */
+mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
+
+mov (1) vme_m0.0<1>:W -8:W {align1};
+mov (1) vme_m0.2<1>:W -8:W {align1};
+
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1};
+
+__vme_msg:
mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b
index 1bb41b2..7ed38c5 100644
--- a/src/shaders/vme/inter_frame_ivb.g7b
+++ b/src/shaders/vme/inter_frame_ivb.g7b
@@ -116,7 +116,7 @@
{ 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 },
{ 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 },
{ 0x00110020, 0x34001c00, 0x00001400, 0x00000010 },
- { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 },
+ { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 },
{ 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 },
{ 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 },
{ 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 },
@@ -141,13 +141,13 @@
{ 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000bc },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000d0 },
{ 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000b0 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000c4 },
{ 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
{ 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
{ 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
@@ -163,15 +163,25 @@
{ 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
{ 0x00010001, 0x247c0171, 0x00000000, 0x00020002 },
{ 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
+ { 0x01000010, 0x20002e28, 0x000000a7, 0x00020002 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000012 },
{ 0x00000001, 0x244c0061, 0x00000000, 0x00203000 },
{ 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
- { 0x00000001, 0x24400021, 0x00000448, 0x00000000 },
{ 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
{ 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
{ 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
{ 0x00010040, 0x24403dad, 0x00000440, 0x000c000c },
{ 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
{ 0x00010040, 0x24423dad, 0x00000442, 0x00080008 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000010 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x20202020 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
+ { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
+ { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 },
{ 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
{ 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 },
{ 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 },
diff --git a/src/shaders/vme/intra_frame_gen8.asm b/src/shaders/vme/intra_frame_gen8.asm
new file mode 100644
index 0000000..682d146
--- /dev/null
+++ b/src/shaders/vme/intra_frame_gen8.asm
@@ -0,0 +1,185 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame_gen8.asm
+//
+// Make intra predition estimation for Intra frame on Gen8
+//
+
+//
+// Now, begin source code....
+//
+
+/*
+ * __START
+ */
+__INTRA_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels
+ */
+/* ROW */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+
+/*
+ * Media Read Message -- fetch Chroma neighbor edge pixels
+ */
+/* ROW */
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */
+mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1};
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */
+mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1};
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1};
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* m2, get the MV/Mb cost passed by constant buffer
+when creating EU thread by MEDIA_OBJECT */
+mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1};
+
+/* m3. This is changed for FWD/BWD cost center */
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+
+/* m4.*/
+mov (8) vme_msg_4<1>:ud 0x0:ud {align1};
+
+/* m5 */
+mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1};
+and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1};
+mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
+
+mov (1) tmp_reg0.0<1>:UW LUMA_CHROMA_MODE:UW {align1};
+/* Use the Luma mode */
+mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m6 */
+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
+mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1};
+mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1};
+
+
+/* m7 */
+
+mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1};
+mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1};
+
+/*
+ * VME message
+ */
+
+/* m1 */
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
+/* m0 */
+/* 16x16 Source, Intra_harr */
+add (1) vme_m0.12<1>:UD vme_m0.12<0,1,0>:ud INTRA_SAD_HAAR:UD {align1};
+mov (8) vme_msg_0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_SIC_MESSAGE_TYPE
+ )
+ mlen sic_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+/*
+ * Oword Block Write message
+ */
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+/* Distortion, Intra (17-16), */
+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
+
+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
+/* VME clock counts */
+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/src/shaders/vme/intra_frame_gen8.g8a b/src/shaders/vme/intra_frame_gen8.g8a
new file mode 100644
index 0000000..859c72c
--- /dev/null
+++ b/src/shaders/vme/intra_frame_gen8.g8a
@@ -0,0 +1,2 @@
+#include "vme8.inc"
+#include "intra_frame_gen8.asm"
diff --git a/src/shaders/vme/intra_frame_gen8.g8b b/src/shaders/vme/intra_frame_gen8.g8b
new file mode 100644
index 0000000..56c7283
--- /dev/null
+++ b/src/shaders/vme/intra_frame_gen8.g8b
@@ -0,0 +1,72 @@
+ { 0x00800001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24800608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 },
+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+ { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f },
+ { 0x00000001, 0x24142288, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 },
+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+ { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 },
+ { 0x00000001, 0x24342288, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 },
+ { 0x00000001, 0x24542288, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 },
+ { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 },
+ { 0x00000041, 0x24880208, 0x06000488, 0x00000002 },
+ { 0x00000001, 0x24942288, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 },
+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 },
+ { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 },
+ { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 },
+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 },
+ { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 },
+ { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 },
+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+ { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 },
+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 },
+ { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x28600608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23800608, 0x00000000, 0x00000000 },
+ { 0x00000005, 0x23840208, 0x06000384, 0xff000000 },
+ { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00000000 },
+ { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 },
+ { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 },
+ { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 },
+ { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 },
+ { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 },
+ { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 },
+ { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 },
+ { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 },
+ { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00200020 },
+ { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240208, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280208, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 },
+ { 0x00000001, 0x28340208, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
+ { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 },
diff --git a/src/shaders/vme/mpeg2_inter_frame.g7a b/src/shaders/vme/mpeg2_inter_frame.g7a
deleted file mode 100644
index 937ea9b..0000000
--- a/src/shaders/vme/mpeg2_inter_frame.g7a
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "vme.inc"
-#include "vme7_mpeg2.inc"
-#include "inter_frame.asm"
diff --git a/src/shaders/vme/mpeg2_inter_frame.g7b b/src/shaders/vme/mpeg2_inter_frame.g7b
deleted file mode 100644
index 40aeb3f..0000000
--- a/src/shaders/vme/mpeg2_inter_frame.g7b
+++ /dev/null
@@ -1,105 +0,0 @@
- { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
- { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
- { 0x00800001, 0x24600061, 0x00000000, 0x00000000 },
- { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
- { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
- { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
- { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
- { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
- { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
- { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
- { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
- { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
- { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
- { 0x00000001, 0x244c0061, 0x00000000, 0x7e203000 },
- { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
- { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
- { 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
- { 0x00000001, 0x24640061, 0x00000000, 0x40000000 },
- { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
- { 0x00000001, 0x24680061, 0x00000000, 0x30003030 },
- { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
- { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
- { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a },
- { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
- { 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 },
- { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
- { 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 },
- { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
- { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
- { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
- { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
- { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 },
- { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 },
- { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 },
- { 0x00000040, 0x25202e2d, 0x000000a0, 0x00010001 },
- { 0x00000040, 0x2520352d, 0x000000a2, 0x00004520 },
- { 0x02000041, 0x200045a0, 0x00000520, 0x000000a1 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 },
- { 0x02000005, 0x20002e28, 0x000000a4, 0x00020002 },
- { 0x00010005, 0x247d1e31, 0x0000047d, 0x000000e0 },
- { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
- { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
- { 0x00000001, 0x22400061, 0x00000000, 0x00000000 },
- { 0x00000005, 0x22440c21, 0x00000244, 0xff000000 },
- { 0x00600001, 0x28600021, 0x008d0240, 0x00000000 },
- { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 },
- { 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 },
- { 0x00000001, 0x28900061, 0x00000000, 0x11111111 },
- { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 },
- { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
- { 0x00600001, 0x24a00021, 0x008d01a0, 0x00000000 },
- { 0x00600001, 0x24c00021, 0x008d01c0, 0x00000000 },
- { 0x00600001, 0x24e00021, 0x008d01e0, 0x00000000 },
- { 0x00600001, 0x25000021, 0x008d0200, 0x00000000 },
- { 0x00600001, 0x28200021, 0x008d04a0, 0x00000000 },
- { 0x00600001, 0x28400021, 0x008d04c0, 0x00000000 },
- { 0x00600001, 0x28600021, 0x008d04e0, 0x00000000 },
- { 0x00600001, 0x28800021, 0x008d0500, 0x00000000 },
- { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 },
- { 0x00000040, 0x28080c21, 0x00000488, 0x00000008 },
- { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
- { 0x00110020, 0x34001c00, 0x00001400, 0x0000001c },
- { 0x00000001, 0x25420169, 0x00000000, 0x00000000 },
- { 0x00000001, 0x25440061, 0x00000000, 0x00000000 },
- { 0x00010005, 0x25422d29, 0x00000182, 0x00200020 },
- { 0x00010008, 0x25422d29, 0x00200542, 0x00050005 },
- { 0x00010041, 0x25442d21, 0x00000542, 0x00600060 },
- { 0x00010040, 0x25442c21, 0x00000544, 0x00200020 },
- { 0x00010009, 0x25422d29, 0x00000542, 0x00050005 },
- { 0x00010040, 0x25422d29, 0x00000542, 0x00400040 },
- { 0x00000040, 0x25422d29, 0x00000542, 0x000e000e },
- { 0x00000001, 0x28200129, 0x00000180, 0x00000000 },
- { 0x00000001, 0x28220129, 0x00000542, 0x00000000 },
- { 0x00000001, 0x28240021, 0x0000019c, 0x00000000 },
- { 0x00000001, 0x28280021, 0x00000544, 0x00000000 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000008 },
- { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
- { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
- { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
- { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
- { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 },
- { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 },
- { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
- { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
- { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 },
- { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
- { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
- { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
- { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
- { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
- { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
- { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
- { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
- { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
- { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x02001400, 0xffffff66 },
- { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
- { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },
diff --git a/src/shaders/vme/mpeg2_inter_gen8.asm b/src/shaders/vme/mpeg2_inter_gen8.asm
new file mode 100644
index 0000000..6dd8599
--- /dev/null
+++ b/src/shaders/vme/mpeg2_inter_gen8.asm
@@ -0,0 +1,868 @@
+/*
+ * Copyright © <2013>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: mpeg2_inter_gen8.asm
+//
+// Make inter predition estimation for MPEG2 Inter-frame on gen8
+//
+
+//
+// Now, begin source code....
+//
+
+#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud
+#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud
+
+/*
+ * __START
+ */
+__INTER_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+
+shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1};
+mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1};
+mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
+mov (8) mba_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbb_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbc_result.0<1>:ud 0x0:ud {align1};
+
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+/* read back the data for MB A */
+/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
+* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
+*/
+mba_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+mov (1) mba_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB A */
+/* bind index 3, read 2 oword (32 bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* MV */
+mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbb_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+/* MB B doesn't exist. Zero MV. mba_flag is zero */
+/* If MB B doesn't exist, neither MB C nor D exists */
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbb_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbc_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB B */
+/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbc_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1};
+/* MB C doesn't exist. Zero MV. mba_flag is zero */
+/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
+(f0.0) jmpi (1) mbd_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB C */
+/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+jmpi (1) mb_mvp_start;
+mbd_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB D */
+/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ub
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+mb_mvp_start:
+/*TODO: Add the skip prediction */
+/* Check whether both MB B and C are inavailable */
+add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
+cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
+(-f0.0) jmpi (1) mb_median_start;
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
+(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1};
+(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1};
+jmpi (1) __mb_hwdep_end;
+
+mb_median_start:
+/* check whether only one neighbour MB has the same ref ID with the current MB */
+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
+cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1};
+cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+
+mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1};
+SAVE_RET {align1};
+ jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1};
+SAVE_RET {align1};
+jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
+
+__mb_hwdep_end:
+
+mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1};
+
+/* Calibrate the ref window for MPEG2 */
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+
+mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1};
+mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1};
+
+SAVE_RET {align1};
+jmpi (1) ref_boundary_check;
+mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1};
+
+/* m2, get the MV/Mb cost passed from constant buffer when
+spawning thread by MEDIA_OBJECT */
+mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* m3 FWD/BWD cost center*/
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+
+/* m4 skip center*/
+mov (8) vme_msg_4<1>:UD 0x0:UD {align1};
+
+/* m5 */
+mov (8) vme_msg_5<1>:UD 0x0:UD {align1};
+
+
+/* Use the Luma mode */
+mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1};
+mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1};
+mov (1) vme_msg_5.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m6 */
+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
+mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1};
+
+
+/* m7 */
+
+mov (8) vme_msg_7.0<1>:ud 0x0:ud {align1};
+
+/*
+ * SIC VME message
+ */
+
+/* Disable Intra8x8/Intra4x4 Intra-prediction */
+/* m1 */
+mov (8) vme_m1.0<1>:ud 0x0:UD {align1};
+
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1};
+add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1};
+mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+/* Enable DC HAAR component when calculating HARR SATD block */
+mov (1) tmp_reg0.0<1>:UW DC_HARR_ENABLE:UW {align1};
+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* m0 */
+mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_SIC_MESSAGE_TYPE
+ )
+ mlen sic_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+/*
+ * Oword Block Write message
+ */
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+/* Distortion, Intra (17-16), */
+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
+
+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
+/* VME clock counts */
+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* the Max MV number is passed by constant buffer */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* Setup the Cost center */
+/* currently four 8x8 share the same cost center */
+mov (4) vme_m3.0<2>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (4) vme_m3.4<2>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+
+mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1};
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* M4/M5 search path */
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1};
+
+mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1};
+
+mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1};
+
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_IME_MESSAGE_TYPE
+ )
+ mlen ime_vme_msg_length
+ rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
+
+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* Send FBR message into CRE */
+
+mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+
+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */
+/* Bilinear filter */
+mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1};
+add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
+mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_FBR_MESSAGE_TYPE
+ )
+ mlen fbr_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(-f0.0) jmpi (1) vme_run_again;
+nop;
+vme_mv_output:
+
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+/* write FME info */
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1};
+/* Inter distortion of FME */
+mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1};
+
+/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME MV */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
+
+
+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+/* bind index 3, write 2 oword (32 bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME RefID */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Issue message fence so that the previous write message is committed */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_FENCE,
+ OBR_MF_COMMIT,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+
+ nop ;
+ nop ;
+
+word_imedian:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_a_ge_b;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ jmpi (1) cmp_end;
+cmp_a_ge_b:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+cmp_end:
+ RETURN {align1};
+
+nop;
+nop;
+ref_boundary_check:
+
+/* The left/up coordinate of reference window */
+add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1};
+/* The right/bottom coordinate of reference window */
+add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1};
+add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1};
+
+/* Firstly the MV range is checked */
+mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1};
+add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1};
+add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1};
+
+cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1};
+(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1};
+cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1};
+(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1};
+cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1};
+(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1};
+cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1};
+(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1};
+
+x_left_cmp:
+ cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1};
+ (-f0.0) jmpi (1) x_right_cmp;
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1};
+ jmpi (1) y_top_cmp;
+x_right_cmp:
+ cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1};
+ (-f0.0) jmpi (1) y_top_cmp;
+ (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1};
+y_top_cmp:
+ cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1};
+ (-f0.0) jmpi (1) y_bottom_cmp;
+ (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1};
+ jmpi (1) y_bottom_end;
+y_bottom_cmp:
+ cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1};
+ (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1};
+
+y_bottom_end:
+mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1};
+add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1};
+ RETURN {align1};
+nop;
+nop;
+
+vme_run_again:
+
+asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1};
+mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1};
+add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1};
+and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1};
+
+cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1};
+(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1};
+(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+
+cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1};
+(f0.0) jmpi (1) vme_start;
+cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1};
+(f0.0) jmpi (1) vme_start;
+
+jmpi (1) vme_done;
+
+vme_start:
+ mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1};
+ mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1};
+
+/* Calibrate the ref window for MPEG2 */
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1};
+add (2) INPUT_ARG0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1};
+
+SAVE_RET {align1};
+jmpi (1) ref_boundary_check;
+mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1};
+
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (8) vme_m1.0<1>:ud 0x0:UD {align1};
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* the Max MV number is passed by constant buffer */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* Setup the Cost center */
+/* currently four 8x8 share the same cost center */
+mov (4) vme_m3.0<2>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (4) vme_m3.4<2>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+
+mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1};
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* M4/M5 search path */
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1};
+
+mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1};
+
+mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1};
+
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_IME_MESSAGE_TYPE
+ )
+ mlen ime_vme_msg_length
+ rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
+
+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* Send FBR message into CRE */
+
+mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+
+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */
+/* Bilinear filter */
+mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1};
+add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
+mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_FBR_MESSAGE_TYPE
+ )
+ mlen fbr_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+cmp.l.f0.0 (1) null:uw vme_wb0.8<0,1,0>:uw tmp_vme_wb0.8<0,1,0>:uw {align1};
+(f0.0) jmpi (1) vme_done;
+mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1};
+mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1};
+
+vme_done:
+ jmpi (1) vme_mv_output;
+nop;
+nop;
+nop;
+
diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8a b/src/shaders/vme/mpeg2_inter_gen8.g8a
new file mode 100644
index 0000000..26f94a7
--- /dev/null
+++ b/src/shaders/vme/mpeg2_inter_gen8.g8a
@@ -0,0 +1,3 @@
+#include "vme8.inc"
+#include "vme75_mpeg2.inc"
+#include "mpeg2_inter_gen8.asm"
diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8b b/src/shaders/vme/mpeg2_inter_gen8.g8b
new file mode 100644
index 0000000..6686c9f
--- /dev/null
+++ b/src/shaders/vme/mpeg2_inter_gen8.g8b
@@ -0,0 +1,371 @@
+ { 0x00800001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24800608, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 },
+ { 0x00000001, 0x24542288, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 },
+ { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 },
+ { 0x00000041, 0x24880208, 0x06000488, 0x00000018 },
+ { 0x00000001, 0x24942288, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x2a401248, 0x16450098, 0x00040004 },
+ { 0x00200001, 0x2a501248, 0x00450094, 0x00000000 },
+ { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000720 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 },
+ { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 },
+ { 0x00200001, 0x2ae40208, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 },
+ { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 },
+ { 0x00200001, 0x2b040208, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 },
+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
+ { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 },
+ { 0x00200001, 0x2b240208, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 },
+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 },
+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
+ { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff },
+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02180203 },
+ { 0x00200001, 0x2b240208, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 },
+ { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 },
+ { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 },
+ { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 },
+ { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 },
+ { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 },
+ { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 },
+ { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 },
+ { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 },
+ { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 },
+ { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 },
+ { 0x00600001, 0x24000608, 0x00000000, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 },
+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 },
+ { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 },
+ { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 },
+ { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000780 },
+ { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 },
+ { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 },
+ { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 },
+ { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000720 },
+ { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 },
+ { 0x00200001, 0x2a201a68, 0x00450ae4, 0x00000000 },
+ { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 },
+ { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 },
+ { 0x00000001, 0x2fa00208, 0x00000440, 0x00000000 },
+ { 0x00000001, 0x2fa80208, 0x00000448, 0x00000000 },
+ { 0x00600001, 0x2fc00208, 0x008d0a40, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x000007b0 },
+ { 0x00200001, 0x24401a68, 0x00450fe4, 0x00000000 },
+ { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x28600608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00010001 },
+ { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00bb00bb },
+ { 0x00000001, 0x28a42288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 },
+ { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 },
+ { 0x00600001, 0x28e00608, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00020002 },
+ { 0x00000040, 0x24001248, 0x16000400, 0x00040004 },
+ { 0x00000001, 0x247c2288, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00000000 },
+ { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240208, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280208, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 },
+ { 0x00000001, 0x28340208, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
+ { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 },
+ { 0x00000001, 0x24561648, 0x10000000, 0x28302830 },
+ { 0x00000001, 0x24440208, 0x00000440, 0x00000000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24600608, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 },
+ { 0x00000001, 0x24680608, 0x00000000, 0x30003030 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00400001, 0x45800208, 0x00000a20, 0x00000000 },
+ { 0x00400001, 0x45840208, 0x00000a20, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00000001, 0x28800608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28840608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28900608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28940608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 },
+ { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 },
+ { 0x00000001, 0x25740608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x25752288, 0x00000199, 0x00000000 },
+ { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 },
+ { 0x00000005, 0x24001248, 0x16000180, 0x00030003 },
+ { 0x00000001, 0x25742288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00241000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00040004 },
+ { 0x00000040, 0x247e2288, 0x2200047e, 0x00000400 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 },
+ { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 },
+ { 0x00110020, 0x34000000, 0x0e001400, 0x000004a0 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000002 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240208, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28280208, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000001 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x00000040, 0x24880208, 0x06000488, 0x00000008 },
+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 },
+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 },
+ { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 },
+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 },
+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00200040, 0x2f601a68, 0x1a450fa8, 0x00450fa0 },
+ { 0x00000040, 0x2f701a68, 0x1e000f60, 0x00300030 },
+ { 0x00000040, 0x2f721a68, 0x1e000f62, 0x00280028 },
+ { 0x00200041, 0x2f901a68, 0x1e450fd0, 0xffffffff },
+ { 0x00200040, 0x2f801a68, 0x1a450fa8, 0x00450f90 },
+ { 0x00200040, 0x2f841a68, 0x1a450fa8, 0x00450fd0 },
+ { 0x05000010, 0x20001a60, 0x1a000f60, 0x00000f80 },
+ { 0x00010001, 0x2f601a68, 0x00000f80, 0x00000000 },
+ { 0x03000010, 0x20001a60, 0x1a000f70, 0x00000f84 },
+ { 0x00010040, 0x2f601a68, 0x1e000f84, 0xffd0ffd0 },
+ { 0x05000010, 0x20001a60, 0x1a000f62, 0x00000f82 },
+ { 0x00010001, 0x2f621a68, 0x00000f82, 0x00000000 },
+ { 0x03000010, 0x20001a60, 0x1a000f72, 0x00000f86 },
+ { 0x00010040, 0x2f621a68, 0x1e000f86, 0xffd8ffd8 },
+ { 0x05000010, 0x20001a60, 0x1e000f60, 0x00000000 },
+ { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 },
+ { 0x00010001, 0x2f601e68, 0x18000000, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x03000010, 0x20001a60, 0x1a000f70, 0x00000fc0 },
+ { 0x00110020, 0x34000000, 0x0e001400, 0x00000010 },
+ { 0x00010040, 0x2f601a68, 0x1e000fc0, 0xffd0ffd0 },
+ { 0x05000010, 0x20001a60, 0x1e000f62, 0x00000000 },
+ { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 },
+ { 0x00010001, 0x2f621e68, 0x18000000, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000020 },
+ { 0x03000010, 0x20001a60, 0x1a000f72, 0x00000fc2 },
+ { 0x00010040, 0x2f621a68, 0x1e000fc2, 0xffd8ffd8 },
+ { 0x00200041, 0x2f801a68, 0x1e450fa8, 0xffffffff },
+ { 0x00200040, 0x2fe41a68, 0x1a450f60, 0x00450f80 },
+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 },
+ { 0x00200001, 0x24001a68, 0x00450a80, 0x00000000 },
+ { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 },
+ { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc },
+ { 0x05000010, 0x20001a60, 0x1e000400, 0x00000000 },
+ { 0x00010041, 0x24001a68, 0x1e000400, 0xffffffff },
+ { 0x05000010, 0x20001a60, 0x1e000402, 0x00000000 },
+ { 0x00010041, 0x24021a68, 0x1e000402, 0xffffffff },
+ { 0x04000010, 0x20001a60, 0x1e000400, 0x00040004 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
+ { 0x04000010, 0x20001a60, 0x1e000402, 0x00040004 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000010 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x000003a0 },
+ { 0x00600001, 0x2c800208, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x2ca00208, 0x008d01a0, 0x00000000 },
+ { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 },
+ { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 },
+ { 0x00000001, 0x2fa80208, 0x00000448, 0x00000000 },
+ { 0x00200040, 0x2fa01a68, 0x1a450440, 0x00450a90 },
+ { 0x00600001, 0x2fc00208, 0x008d0a40, 0x00000000 },
+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0xfffffca0 },
+ { 0x00200001, 0x24401a68, 0x00450fe4, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 },
+ { 0x00000001, 0x24561648, 0x10000000, 0x28302830 },
+ { 0x00000001, 0x24440208, 0x00000440, 0x00000000 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x24600608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x24600608, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 },
+ { 0x00000001, 0x24680608, 0x00000000, 0x30003030 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00400001, 0x45800208, 0x00000a20, 0x00000000 },
+ { 0x00400001, 0x45840208, 0x00000a20, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00000001, 0x28800608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28840608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28900608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28940608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 },
+ { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 },
+ { 0x00000001, 0x25740608, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x25752288, 0x00000199, 0x00000000 },
+ { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 },
+ { 0x00000005, 0x24001248, 0x16000180, 0x00030003 },
+ { 0x00000001, 0x25742288, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x00241000 },
+ { 0x00000001, 0x24001648, 0x10000000, 0x00040004 },
+ { 0x00000040, 0x247e2288, 0x2200047e, 0x00000400 },
+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 },
+ { 0x05000010, 0x20001240, 0x12000188, 0x00000c88 },
+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000020 },
+ { 0x00600001, 0x21800208, 0x008d0c80, 0x00000000 },
+ { 0x00600001, 0x21a00208, 0x008d0ca0, 0x00000000 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0xfffff6f0 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/shaders/vme/mpeg2_inter_haswell.asm b/src/shaders/vme/mpeg2_inter_haswell.asm
new file mode 100644
index 0000000..c224cf0
--- /dev/null
+++ b/src/shaders/vme/mpeg2_inter_haswell.asm
@@ -0,0 +1,860 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ * Author : Zhao Yakui <yakui.zhao@intel.com>
+ */
+// Modual name: mpeg2_inter_haswell.asm
+//
+// Make MPEG2 inter predition estimation for Inter-frame on Haswell
+//
+
+//
+// Now, begin source code....
+//
+
+#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud
+#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud
+
+/*
+ * __START
+ */
+__INTER_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1};
+mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1};
+mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
+mov (8) mba_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbb_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbc_result.0<1>:ud 0x0:ud {align1};
+
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+/* read back the data for MB A */
+/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
+* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
+*/
+mba_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+mov (1) mba_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB A */
+/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* MV */
+mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbb_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+/* MB B doesn't exist. Zero MV. mba_flag is zero */
+/* If MB B doesn't exist, neither MB C nor D exists */
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbb_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbc_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB B */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbc_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1};
+/* MB C doesn't exist. Zero MV. mba_flag is zero */
+/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
+(f0.0) jmpi (1) mbd_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB C */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+jmpi (1) mb_mvp_start;
+mbd_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB D */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ub
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+mb_mvp_start:
+/*TODO: Add the skip prediction */
+/* Check whether both MB B and C are inavailable */
+add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
+cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
+(-f0.0) jmpi (1) mb_median_start;
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
+(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1};
+(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1};
+jmpi (1) __mb_hwdep_end;
+
+mb_median_start:
+/* check whether only one neighbour MB has the same ref ID with the current MB */
+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
+cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1};
+cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+
+mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1};
+SAVE_RET {align1};
+ jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1};
+SAVE_RET {align1};
+jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
+
+__mb_hwdep_end:
+
+mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1};
+
+/* Calibrate the ref window for MPEG2 */
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+
+mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1};
+mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1};
+
+SAVE_RET {align1};
+jmpi (1) ref_boundary_check;
+mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1};
+
+/* m2, get the MV/Mb cost passed from constant buffer when
+spawning thread by MEDIA_OBJECT */
+mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* m3 */
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+
+/* the neighbour pixel is zero for MPEG2 Intra-prediction */
+
+/* m4 */
+mov (8) vme_msg_4<1>:UD 0:UD {align1};
+mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1};
+/* Use the Luma mode */
+mov (1) vme_msg_4.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1};
+mov (1) vme_msg_4.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m5 */
+mov (8) vme_msg_5<1>:UD 0x0:UD {align1};
+mov (1) vme_msg_5.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov (1) vme_msg_5.28<1>:UD 0x010101:UD {align1};
+
+
+/* m6 */
+mov (8) vme_msg_6.0<1>:UD 0:Ud {align1};
+
+/*
+ * SIC VME message
+ */
+/* m0 */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+/* Disable Intra8x8/Intra4x4 Intra-prediction */
+/* m1 */
+mov (8) vme_m1.0<1>:ud 0x0:UD {align1};
+
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1};
+add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1};
+mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+/* Enable DC HAAR component when calculating HARR SATD block */
+mov (1) tmp_reg0.0<1>:UW DC_HARR_ENABLE:UW {align1};
+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */
+/* m0 */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_SIC_MESSAGE_TYPE
+ )
+ mlen sic_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+/*
+ * Oword Block Write message
+ */
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+/* Distortion, Intra (17-16), */
+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
+
+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
+/* VME clock counts */
+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* the Max MV number is passed by constant buffer */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+/* Set the MV cost center */
+mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+/* M3/M4 search path */
+
+mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1};
+
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1};
+
+mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1};
+
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_IME_MESSAGE_TYPE
+ )
+ mlen ime_vme_msg_length
+ rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
+
+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* Send FBR message into CRE */
+
+mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+
+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */
+
+/* Bilinear filter */
+mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1};
+add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_FBR_MESSAGE_TYPE
+ )
+ mlen fbr_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(-f0.0) jmpi (1) vme_run_again;
+nop;
+vme_mv_output:
+
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+/* write FME info */
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1};
+/* Inter distortion of FME */
+mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1};
+
+/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+
+/* Write FME/BME MV */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
+
+
+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME RefID */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+
+/* Issue message fence so that the previous write message is committed */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_FENCE,
+ OBR_MF_COMMIT,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+ nop ;
+ nop ;
+
+word_imedian:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_a_ge_b;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ jmpi (1) cmp_end;
+cmp_a_ge_b:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+cmp_end:
+ RETURN {align1};
+
+nop;
+nop;
+
+ref_boundary_check:
+
+/* The left/up coordinate of reference window */
+add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1};
+/* The right/bottom coordinate of reference window */
+add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1};
+add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1};
+
+/* Firstly the MV range is checked */
+mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1};
+add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1};
+add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1};
+
+cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1};
+(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1};
+cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1};
+(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1};
+cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1};
+(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1};
+cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1};
+(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1};
+
+x_left_cmp:
+ cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1};
+ (-f0.0) jmpi (1) x_right_cmp;
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1};
+ jmpi (1) y_top_cmp;
+x_right_cmp:
+ cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1};
+ (-f0.0) jmpi (1) y_top_cmp;
+ (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1};
+y_top_cmp:
+ cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1};
+ (-f0.0) jmpi (1) y_bottom_cmp;
+ (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1};
+ jmpi (1) y_bottom_end;
+y_bottom_cmp:
+ cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1};
+ (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1};
+
+y_bottom_end:
+mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1};
+add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1};
+ RETURN {align1};
+nop;
+nop;
+
+vme_run_again:
+
+asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1};
+mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1};
+add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1};
+and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1};
+
+cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1};
+(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1};
+(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+
+cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1};
+(f0.0) jmpi (1) vme_start;
+cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1};
+(f0.0) jmpi (1) vme_start;
+
+jmpi (1) vme_done;
+
+vme_start:
+ mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1};
+ mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1};
+
+/* Calibrate the ref window for MPEG2 */
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+mov (4) INPUT_ARG0.0<1>:ud vme_m0.0<4,4,1>:ud {align1};
+add (2) INPUT_ARG0.0<1>:w INPUT_ARG0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1};
+
+SAVE_RET {align1};
+jmpi (1) ref_boundary_check;
+mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1};
+
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (8) vme_m1.0<1>:ud 0x0:UD {align1};
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* the Max MV number is passed by constant buffer */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+/* Set the MV cost center */
+mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+/* M3/M4 search path */
+
+mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1};
+
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1};
+
+mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1};
+
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_IME_MESSAGE_TYPE
+ )
+ mlen ime_vme_msg_length
+ rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
+
+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* Send FBR message into CRE */
+
+mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+
+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */
+
+/* Bilinear filter */
+mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1};
+add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_FBR_MESSAGE_TYPE
+ )
+ mlen fbr_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+cmp.l.f0.0 (1) null:uw vme_wb0.8<0,1,0>:uw tmp_vme_wb0.8<0,1,0>:uw {align1};
+(f0.0) jmpi (1) vme_done;
+mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1};
+mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1};
+
+vme_done:
+ jmpi (1) vme_mv_output;
+nop;
+nop;
+nop;
+
diff --git a/src/shaders/vme/mpeg2_inter_frame_haswell.g75a b/src/shaders/vme/mpeg2_inter_haswell.g75a
index 662c76f..355812c 100644
--- a/src/shaders/vme/mpeg2_inter_frame_haswell.g75a
+++ b/src/shaders/vme/mpeg2_inter_haswell.g75a
@@ -1,3 +1,3 @@
#include "vme75.inc"
#include "vme75_mpeg2.inc"
-#include "inter_frame_haswell.asm"
+#include "mpeg2_inter_haswell.asm"
diff --git a/src/shaders/vme/mpeg2_inter_frame_haswell.g75b b/src/shaders/vme/mpeg2_inter_haswell.g75b
index 35175c7..25c629d 100644
--- a/src/shaders/vme/mpeg2_inter_frame_haswell.g75b
+++ b/src/shaders/vme/mpeg2_inter_haswell.g75b
@@ -2,44 +2,21 @@
{ 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24c00061, 0x00000000, 0x00000000 },
- { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
- { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
- { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
- { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
- { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
- { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
- { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
{ 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
{ 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
{ 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
{ 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
{ 0x00000041, 0x24880c21, 0x00000488, 0x00000018 },
{ 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
- { 0x04600031, 0x23801cb1, 0x00000800, 0x02190004 },
- { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
- { 0x04600031, 0x23a01cb1, 0x00000800, 0x02290004 },
- { 0x00200009, 0x24002e25, 0x004500a0, 0x00030003 },
- { 0x00000041, 0x24003ca5, 0x00000400, 0x00020002 },
- { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
- { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
- { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
- { 0x04600031, 0x26001cb1, 0x00000800, 0x02190006 },
- { 0x00200009, 0x24202e25, 0x004500a0, 0x00030003 },
- { 0x00000041, 0x24203ca5, 0x00000420, 0x00020002 },
- { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
- { 0x00000001, 0x242800e1, 0x00000000, 0x00070003 },
- { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
- { 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 },
+ { 0x00200009, 0x2a402d29, 0x00450098, 0x00040004 },
+ { 0x00200001, 0x2a500129, 0x00450094, 0x00000000 },
{ 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x2a800061, 0x00000000, 0x00000000 },
- { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 },
- { 0x00010020, 0x34001c00, 0x00001400, 0x00000740 },
{ 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x2b000061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x2b200061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000710 },
{ 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
{ 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff },
@@ -56,8 +33,8 @@
{ 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff },
{ 0x00010020, 0x34001c00, 0x00001400, 0x00000040 },
{ 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 },
- { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 },
- { 0x00200001, 0x2ae40021, 0x00450bc8, 0x00000000 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2ae40021, 0x00450ba0, 0x00000000 },
{ 0x00000001, 0x2af001ed, 0x00000000, 0x00010001 },
{ 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
@@ -76,8 +53,8 @@
{ 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff },
{ 0x00010020, 0x34001c00, 0x00001400, 0x00000040 },
{ 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 },
- { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 },
- { 0x00200001, 0x2b040021, 0x00450bf0, 0x00000000 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2b040021, 0x00450ba0, 0x00000000 },
{ 0x00000001, 0x2b1001ed, 0x00000000, 0x00010001 },
{ 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002e28, 0x000000a5, 0x00080008 },
@@ -95,8 +72,8 @@
{ 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff },
{ 0x00010020, 0x34001c00, 0x00001400, 0x00000170 },
{ 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 },
- { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 },
- { 0x00200001, 0x2b240021, 0x00450bf0, 0x00000000 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 },
{ 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 },
{ 0x00000020, 0x34001c00, 0x00001400, 0x00000120 },
{ 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
@@ -114,13 +91,13 @@
{ 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff },
{ 0x00010020, 0x34001c00, 0x00001400, 0x00000040 },
{ 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 },
- { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02480403 },
- { 0x00200001, 0x2b240021, 0x00450c18, 0x00000000 },
+ { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 },
{ 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 },
{ 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 },
{ 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 },
{ 0x00110020, 0x34001c00, 0x00001400, 0x00000080 },
- { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 },
+ { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 },
{ 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 },
{ 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 },
{ 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 },
@@ -145,38 +122,43 @@
{ 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000850 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000760 },
{ 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000007f0 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000700 },
{ 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
- { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
- { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
- { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc },
+ { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
+ { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 },
+ { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 },
+ { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 },
+ { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000790 },
+ { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 },
{ 0x00600001, 0x25600021, 0x008d0020, 0x00000000 },
{ 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
{ 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
- { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
- { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 },
- { 0x00600001, 0x28800021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00010001 },
+ { 0x00000001, 0x28850231, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00bb00bb },
+ { 0x00000001, 0x28840231, 0x00000400, 0x00000000 },
{ 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
- { 0x00800001, 0x28a00231, 0x00cf03a3, 0x00000000 },
{ 0x00000001, 0x28b00061, 0x00000000, 0x11111111 },
{ 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 },
- { 0x00000001, 0x28b40129, 0x00000606, 0x00000000 },
- { 0x00400001, 0x28d00021, 0x00690608, 0x00000000 },
- { 0x00600001, 0x28c00129, 0x00ae0622, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
- { 0x00000001, 0x24000169, 0x00000000, 0x00010001 },
- { 0x00000001, 0x28850231, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
{ 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
- { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
- { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00020002 },
+ { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 },
+ { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 },
{ 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
- { 0x00000001, 0x24000169, 0x00000000, 0x00200020 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x247e0231, 0x00000400, 0x00000000 },
{ 0x00000001, 0x244c0061, 0x00000000, 0x00800000 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
@@ -194,24 +176,13 @@
{ 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
{ 0x00000001, 0x244c0061, 0x00000000, 0x7e200000 },
{ 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
- { 0x00000001, 0x24400021, 0x00000448, 0x00000000 },
- { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 },
- { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 },
- { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
- { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
{ 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
- { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
- { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c },
- { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
- { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 },
- { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 },
- { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
{ 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
{ 0x00000001, 0x24680061, 0x00000000, 0x30003030 },
- { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 },
- { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 },
+ { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 },
+ { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
{ 0x00000001, 0x28600061, 0x00000000, 0x01010101 },
@@ -237,11 +208,16 @@
{ 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 },
- { 0x00000001, 0x244c0061, 0x00000000, 0x00243000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00241000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00040004 },
+ { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
{ 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 },
+ { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x000004a0 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00000040, 0x24880c21, 0x00000488, 0x00000002 },
{ 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
{ 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
@@ -255,7 +231,7 @@
{ 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x28800021, 0x008d0200, 0x00000000 },
- { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
{ 0x00000040, 0x24880c21, 0x00000488, 0x00000008 },
{ 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0240, 0x00000000 },
@@ -265,20 +241,6 @@
{ 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x06000010, 0x200035ac, 0x00000fa0, 0x00000fa4 },
- { 0x00010001, 0x2f6001ad, 0x00000fa0, 0x00000000 },
- { 0x00110001, 0x2f6001ad, 0x00000fa4, 0x00000000 },
- { 0x06000010, 0x200035ac, 0x00000f60, 0x00000fa8 },
- { 0x00010001, 0x2fe401ad, 0x00000f60, 0x00000000 },
- { 0x00110001, 0x2fe401ad, 0x00000fa8, 0x00000000 },
- { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 },
- { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 },
- { 0x00010001, 0x2f6001ad, 0x00000fa0, 0x00000000 },
- { 0x00110001, 0x2f6001ad, 0x00000fa4, 0x00000000 },
- { 0x04000010, 0x200035ac, 0x00000f60, 0x00000fa8 },
- { 0x00010001, 0x2fe401ad, 0x00000f60, 0x00000000 },
- { 0x00110001, 0x2fe401ad, 0x00000fa8, 0x00000000 },
- { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 },
{ 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 },
{ 0x00010020, 0x34001c00, 0x00001400, 0x00000070 },
{ 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 },
@@ -295,3 +257,110 @@
{ 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 },
{ 0x00110001, 0x2fe401ad, 0x00000fa0, 0x00000000 },
{ 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00200040, 0x2f6035ad, 0x00450fa8, 0x00450fa0 },
+ { 0x00000040, 0x2f703dad, 0x00000f60, 0x00300030 },
+ { 0x00000040, 0x2f723dad, 0x00000f62, 0x00280028 },
+ { 0x00200041, 0x2f903dad, 0x00450fd0, 0xffffffff },
+ { 0x00200040, 0x2f8035ad, 0x00450fa8, 0x00450f90 },
+ { 0x00200040, 0x2f8435ad, 0x00450fa8, 0x00450fd0 },
+ { 0x05000010, 0x200035ac, 0x00000f60, 0x00000f80 },
+ { 0x00010001, 0x2f6001ad, 0x00000f80, 0x00000000 },
+ { 0x03000010, 0x200035ac, 0x00000f70, 0x00000f84 },
+ { 0x00010040, 0x2f603dad, 0x00000f84, 0xffd0ffd0 },
+ { 0x05000010, 0x200035ac, 0x00000f62, 0x00000f82 },
+ { 0x00010001, 0x2f6201ad, 0x00000f82, 0x00000000 },
+ { 0x03000010, 0x200035ac, 0x00000f72, 0x00000f86 },
+ { 0x00010040, 0x2f623dad, 0x00000f86, 0xffd8ffd8 },
+ { 0x05000010, 0x20003dac, 0x00000f60, 0x00000000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 },
+ { 0x00010001, 0x2f6001ed, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000030 },
+ { 0x03000010, 0x200035ac, 0x00000f70, 0x00000fc0 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 },
+ { 0x00010040, 0x2f603dad, 0x00000fc0, 0xffd0ffd0 },
+ { 0x05000010, 0x20003dac, 0x00000f62, 0x00000000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 },
+ { 0x00010001, 0x2f6201ed, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000020 },
+ { 0x03000010, 0x200035ac, 0x00000f72, 0x00000fc2 },
+ { 0x00010040, 0x2f623dad, 0x00000fc2, 0xffd8ffd8 },
+ { 0x00200041, 0x2f803dad, 0x00450fa8, 0xffffffff },
+ { 0x00200040, 0x2fe435ad, 0x00450f60, 0x00450f80 },
+ { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
+ { 0x00200001, 0x240001ad, 0x00450a80, 0x00000000 },
+ { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
+ { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc },
+ { 0x05000010, 0x20003dac, 0x00000400, 0x00000000 },
+ { 0x00010041, 0x24003dad, 0x00000400, 0xffffffff },
+ { 0x05000010, 0x20003dac, 0x00000402, 0x00000000 },
+ { 0x00010041, 0x24023dad, 0x00000402, 0xffffffff },
+ { 0x04000010, 0x20003dac, 0x00000400, 0x00040004 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000030 },
+ { 0x04000010, 0x20003dac, 0x00000402, 0x00040004 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000010 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000380 },
+ { 0x00600001, 0x2c800021, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x2ca00021, 0x008d01a0, 0x00000000 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
+ { 0x00400001, 0x2fa00021, 0x00690440, 0x00000000 },
+ { 0x00200040, 0x2fa035ad, 0x00450fa0, 0x00450a90 },
+ { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 },
+ { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0xfffffca0 },
+ { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x7e200000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x30003030 },
+ { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 },
+ { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+ { 0x00000001, 0x28600061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28640061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28700061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28740061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28800061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28840061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28900061, 0x00000000, 0x00000000 },
+ { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 },
+ { 0x00000001, 0x25740061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x25750231, 0x00000199, 0x00000000 },
+ { 0x00000001, 0x25760231, 0x0000019a, 0x00000000 },
+ { 0x00000005, 0x24002d29, 0x00000180, 0x00030003 },
+ { 0x00000001, 0x25740231, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28600021, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00241000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00040004 },
+ { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+ { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 },
+ { 0x05000010, 0x20002528, 0x00000188, 0x00000c88 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000020 },
+ { 0x00600001, 0x21800021, 0x008d0c80, 0x00000000 },
+ { 0x00600001, 0x21a00021, 0x008d0ca0, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0xfffff710 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/shaders/vme/mpeg2_inter_ivb.asm b/src/shaders/vme/mpeg2_inter_ivb.asm
new file mode 100644
index 0000000..dde9643
--- /dev/null
+++ b/src/shaders/vme/mpeg2_inter_ivb.asm
@@ -0,0 +1,705 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ * Authors: Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+// Modual name: mpeg2_inter_ivb.asm
+//
+// Make inter predition estimation for Mpeg2 Inter frame on Ivy
+//
+
+//
+// Now, begin source code....
+//
+
+#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud
+#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud
+
+/*
+ * __START
+ */
+__INTER_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1};
+mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1};
+
+mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
+mov (8) mba_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbb_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbc_result.0<1>:ud 0x0:ud {align1};
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+/* read back the data for MB A */
+/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
+* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
+*/
+mba_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+mov (1) mba_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1};
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
+/* bind index 3, read 1 oword (16bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_0,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
+(-f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(-f0.0) jmpi (1) mbb_start;
+
+mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1};
+/* Read MV for MB A */
+/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* MV */
+mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbb_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+/* MB B doesn't exist. Zero MV. mba_flag is zero */
+/* If MB B doesn't exist, neither MB C nor D exists */
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbb_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1};
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_0,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
+(-f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(-f0.0) jmpi (1) mbc_start;
+
+mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1};
+/* Read MV for MB B */
+/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbc_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1};
+/* MB C doesn't exist. Zero MV. mba_flag is zero */
+/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
+(f0.0) jmpi (1) mbd_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1};
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_0,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
+(-f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(-f0.0) jmpi (1) mb_mvp_start;
+mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1};
+/* Read MV for MB C */
+/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+jmpi (1) mb_mvp_start;
+mbd_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1};
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_0,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
+(-f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(-f0.0) jmpi (1) mb_mvp_start;
+
+mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1};
+/* Read MV for MB D */
+/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ub
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+mb_mvp_start:
+/*TODO: Add the skip prediction */
+/* Check whether both MB B and C are invailable */
+add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
+cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
+(-f0.0) jmpi (1) mb_median_start;
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
+(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1};
+(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1};
+jmpi (1) __mb_hwdep_end;
+
+mb_median_start:
+/* check whether only one neighbour MB has the same ref ID with the current MB */
+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
+cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1};
+cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+
+mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1};
+SAVE_RET {align1};
+ jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1};
+SAVE_RET {align1};
+jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
+
+__mb_hwdep_end:
+
+mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1};
+
+/* Calibrate the ref window for MPEG2 */
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1};
+mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1};
+mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1};
+
+SAVE_RET {align1};
+jmpi (1) ref_boundary_check;
+mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1};
+
+/* m2 */
+mov (8) vme_msg_2<1>:UD 0x0:UD {align1};
+
+/* m3 */
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+/* Use the Luma mode */
+mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1};
+mov (1) vme_msg_3.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m4 */
+mov (8) vme_msg_4<1>:UD 0x0:UD {align1};
+mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+
+/* m1 */
+mov (8) vme_m1.0<1>:ud 0x0:ud {align1};
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1};
+add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1};
+mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1};
+/* m1 */
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+
+/* M0 */
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1};
+/* 16x16 Source, 1/2 pixel, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+/* m1 */
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* MV num is passed by constant buffer. R4.28 */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+add (1) vme_m1.4<1>:UD vme_m1.4<0,1,0>:UD FB_PRUNING_DISABLE:UD {align1};
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+
+/* Bilinear filter */
+mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1};
+add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1};
+
+/* Set the MV cost center */
+mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+
+send (8)
+ vme_msg_ind
+ vme_wb
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_MESSAGE_TYPE_MIXED
+ )
+ mlen vme_msg_length
+ rlen vme_inter_wb_length
+ {align1};
+
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(-f0.0) jmpi (1) vme_run_again;
+
+vme_mv_output:
+
+and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
+
+(-f0.0)jmpi (1) __INTRA_INFO ;
+
+__INTER_INFO:
+/* Write MV pairs */
+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+
+mov (1) msg_reg1.0<1>:ud vme_wb0.0<0,1,0>:ud {align1} ;
+mov (1) msg_reg1.4<1>:UD vme_wb0.28<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:ud tmp_ud1<0,1,0>:ud {align1} ;
+mov (1) msg_reg1.12<1>:ud vme_wb0.0<0,1,0>:ud {align1} ;
+mov (1) msg_reg1.16<1>:ud 0x25:ud {align1} ;
+jmpi (1) __OUTPUT_INFO;
+
+__INTRA_INFO:
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+mov (1) msg_reg1.16<1>:ud 0x35:ud {align1} ;
+
+__OUTPUT_INFO:
+
+mov (1) msg_reg1.20<1>:ud obw_m0.8<0,1,0>:ud {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
+mov (8) msg_reg0.0<1>:ud obw_m0.0<8,8,1>:ud {align1};
+
+
+/* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Issue message fence so that the previous write message is committed */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_FENCE,
+ OBR_MF_COMMIT,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+
+ nop ;
+ nop ;
+
+word_imedian:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_a_ge_b;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ jmpi (1) cmp_end;
+cmp_a_ge_b:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+cmp_end:
+ RETURN {align1};
+
+nop;
+nop;
+
+ref_boundary_check:
+
+/* The left/up coordinate of reference window */
+add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1};
+/* The right/bottom coordinate of reference window */
+add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1};
+add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1};
+
+/* Firstly the MV range is checked */
+mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1};
+add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1};
+add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1};
+
+cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1};
+(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1};
+cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1};
+(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1};
+cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1};
+(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1};
+cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1};
+(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1};
+
+
+x_left_cmp:
+ cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1};
+ (-f0.0) jmpi (1) x_right_cmp;
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1};
+ jmpi (1) y_top_cmp;
+x_right_cmp:
+ cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1};
+ (-f0.0) jmpi (1) y_top_cmp;
+ (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1};
+y_top_cmp:
+ cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1};
+ (-f0.0) jmpi (1) y_bottom_cmp;
+ (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1};
+ jmpi (1) y_bottom_end;
+y_bottom_cmp:
+ cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1};
+ (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1};
+
+y_bottom_end:
+mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1};
+add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1};
+ RETURN {align1};
+nop;
+nop;
+
+vme_run_again:
+
+asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1};
+mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1};
+add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1};
+and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1};
+
+cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1};
+(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1};
+(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+
+cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1};
+(f0.0) jmpi (1) vme_start;
+cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1};
+(f0.0) jmpi (1) vme_start;
+
+jmpi (1) vme_done;
+
+vme_start:
+ mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1};
+ mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1};
+
+/* Calibrate the ref window for MPEG2 */
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1};
+add (2) INPUT_ARG0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1};
+
+SAVE_RET {align1};
+jmpi (1) ref_boundary_check;
+mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1};
+
+/* m2 */
+mov (8) vme_msg_2<1>:UD 0x0:UD {align1};
+
+/* m3 */
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+
+/* m4 */
+mov (8) vme_msg_4<1>:UD 0x0:UD {align1};
+
+
+/* m1 */
+mov (8) vme_m1.0<1>:ud 0x0:ud {align1};
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1};
+add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1};
+mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1};
+/* m1 */
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+
+/* M0 */
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1};
+/* 16x16 Source, 1/2 pixel, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+/* m1 */
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+/* MV num is passed by constant buffer. R4.28 */
+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
+add (1) vme_m1.4<1>:UD vme_m1.4<0,1,0>:UD FB_PRUNING_DISABLE:UD {align1};
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+
+/* Bilinear filter */
+mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1};
+add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1};
+
+/* Set the MV cost center */
+mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+
+send (8)
+ vme_msg_ind
+ vme_wb
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_MESSAGE_TYPE_INTER
+ )
+ mlen vme_msg_length
+ rlen vme_inter_wb_length
+ {align1};
+
+
+cmp.l.f0.0 (1) null:uw vme_wb0.6<0,1,0>:uw tmp_vme_wb0.6<0,1,0>:uw {align1};
+(f0.0) jmpi (1) vme_done;
+mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1};
+mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1};
+
+vme_done:
+ jmpi (1) vme_mv_output;
+nop;
+nop;
+nop;
+
diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7a b/src/shaders/vme/mpeg2_inter_ivb.g7a
new file mode 100644
index 0000000..bf0cdb3
--- /dev/null
+++ b/src/shaders/vme/mpeg2_inter_ivb.g7a
@@ -0,0 +1,3 @@
+#include "vme7.inc"
+#include "vme7_mpeg2.inc"
+#include "mpeg2_inter_ivb.asm"
diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7b b/src/shaders/vme/mpeg2_inter_ivb.g7b
new file mode 100644
index 0000000..1ffcdd5
--- /dev/null
+++ b/src/shaders/vme/mpeg2_inter_ivb.g7b
@@ -0,0 +1,308 @@
+ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+ { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a },
+ { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x2a402d29, 0x00450098, 0x00040004 },
+ { 0x00200001, 0x2a500129, 0x00450094, 0x00000000 },
+ { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b200061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x000000f2 },
+ { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
+ { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000022 },
+ { 0x00000001, 0x2ae000e5, 0x00000000, 0x00000001 },
+ { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24003dad, 0x00000400, 0xffffffff },
+ { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 },
+ { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 },
+ { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a },
+ { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 },
+ { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 },
+ { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 },
+ { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 },
+ { 0x00310001, 0x2af401ed, 0x00000000, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2ae40021, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2af001ed, 0x00000000, 0x00010001 },
+ { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
+ { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff },
+ { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000076 },
+ { 0x00000001, 0x2b0000e5, 0x00000000, 0x00000001 },
+ { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff },
+ { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 },
+ { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 },
+ { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a },
+ { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 },
+ { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 },
+ { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 },
+ { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 },
+ { 0x00310001, 0x2b1401ed, 0x00000000, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2b040021, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2b1001ed, 0x00000000, 0x00010001 },
+ { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00080008 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000026 },
+ { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 },
+ { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 },
+ { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff },
+ { 0x00000040, 0x24003dad, 0x00000400, 0x00010001 },
+ { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 },
+ { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 },
+ { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a },
+ { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 },
+ { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 },
+ { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 },
+ { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 },
+ { 0x00310001, 0x2b3401ed, 0x00000000, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000032 },
+ { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000028 },
+ { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00040004 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000022 },
+ { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 },
+ { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 },
+ { 0x00200040, 0x24003dad, 0x00450400, 0xffffffff },
+ { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 },
+ { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 },
+ { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a },
+ { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 },
+ { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 },
+ { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 },
+ { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 },
+ { 0x00310001, 0x2b3401ed, 0x00000000, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 },
+ { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02180203 },
+ { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 },
+ { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 },
+ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 },
+ { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 },
+ { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 },
+ { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 },
+ { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 },
+ { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 },
+ { 0x00010001, 0x2b340129, 0x00000af4, 0x00000000 },
+ { 0x00010001, 0x2ac00021, 0x00000ae4, 0x00000000 },
+ { 0x00110001, 0x2ac00061, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000032 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x01000010, 0x20003da4, 0x00200af4, 0x00000000 },
+ { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 },
+ { 0x00010001, 0x24040021, 0x00000ae4, 0x00000000 },
+ { 0x01000010, 0x20003da4, 0x00200b14, 0x00000000 },
+ { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 },
+ { 0x00010001, 0x24040021, 0x00000b04, 0x00000000 },
+ { 0x01000010, 0x20003da4, 0x00200b34, 0x00000000 },
+ { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 },
+ { 0x00010001, 0x24040021, 0x00000b24, 0x00000000 },
+ { 0x01000010, 0x20003da4, 0x00200400, 0x00010001 },
+ { 0x00010001, 0x2ac00021, 0x00000404, 0x00000000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000018 },
+ { 0x00000001, 0x2fa001ad, 0x00000ae4, 0x00000000 },
+ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
+ { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
+ { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x0000008e },
+ { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
+ { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
+ { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
+ { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
+ { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000082 },
+ { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
+ { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
+ { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 },
+ { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 },
+ { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 },
+ { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000094 },
+ { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00bb00bb },
+ { 0x00000001, 0x28640231, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x28900061, 0x00000000, 0x11111111 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00020002 },
+ { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 },
+ { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x7e201000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
+ { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 },
+ { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
+ { 0x00000040, 0x24640c21, 0x00000464, 0x00000000 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x30003030 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00040004 },
+ { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 },
+ { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 },
+ { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 },
+ { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000096 },
+ { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000012 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+ { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240021, 0x0000019c, 0x00000000 },
+ { 0x00000001, 0x28280021, 0x00000544, 0x00000000 },
+ { 0x00000001, 0x282c0021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28300061, 0x00000000, 0x00000025 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x0000000a },
+ { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28300061, 0x00000000, 0x00000035 },
+ { 0x00000001, 0x28340021, 0x00000488, 0x00000000 },
+ { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+ { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x0219e003 },
+ { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe401ad, 0x00000fa0, 0x00000000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000014 },
+ { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe401ad, 0x00000fa4, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x0000000c },
+ { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 },
+ { 0x00010001, 0x2fe401ad, 0x00000fa4, 0x00000000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000006 },
+ { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 },
+ { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 },
+ { 0x00110001, 0x2fe401ad, 0x00000fa0, 0x00000000 },
+ { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00200040, 0x2f6035ad, 0x00450fa8, 0x00450fa0 },
+ { 0x00000040, 0x2f703dad, 0x00000f60, 0x00300030 },
+ { 0x00000040, 0x2f723dad, 0x00000f62, 0x00280028 },
+ { 0x00200041, 0x2f903dad, 0x00450fd0, 0xffffffff },
+ { 0x00200040, 0x2f8035ad, 0x00450fa8, 0x00450f90 },
+ { 0x00200040, 0x2f8435ad, 0x00450fa8, 0x00450fd0 },
+ { 0x05000010, 0x200035ac, 0x00000f60, 0x00000f80 },
+ { 0x00010001, 0x2f6001ad, 0x00000f80, 0x00000000 },
+ { 0x03000010, 0x200035ac, 0x00000f70, 0x00000f84 },
+ { 0x00010040, 0x2f603dad, 0x00000f84, 0xffd0ffd0 },
+ { 0x05000010, 0x200035ac, 0x00000f62, 0x00000f82 },
+ { 0x00010001, 0x2f6201ad, 0x00000f82, 0x00000000 },
+ { 0x03000010, 0x200035ac, 0x00000f72, 0x00000f86 },
+ { 0x00010040, 0x2f623dad, 0x00000f86, 0xffd8ffd8 },
+ { 0x05000010, 0x20003dac, 0x00000f60, 0x00000000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000004 },
+ { 0x00010001, 0x2f6001ed, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000006 },
+ { 0x03000010, 0x200035ac, 0x00000f70, 0x00000fc0 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000002 },
+ { 0x00010040, 0x2f603dad, 0x00000fc0, 0xffd0ffd0 },
+ { 0x05000010, 0x20003dac, 0x00000f62, 0x00000000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000004 },
+ { 0x00010001, 0x2f6201ed, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000004 },
+ { 0x03000010, 0x200035ac, 0x00000f72, 0x00000fc2 },
+ { 0x00010040, 0x2f623dad, 0x00000fc2, 0xffd8ffd8 },
+ { 0x00200041, 0x2f803dad, 0x00450fa8, 0xffffffff },
+ { 0x00200040, 0x2fe435ad, 0x00450f60, 0x00450f80 },
+ { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
+ { 0x00200001, 0x240001ad, 0x00450a80, 0x00000000 },
+ { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
+ { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc },
+ { 0x05000010, 0x20003dac, 0x00000400, 0x00000000 },
+ { 0x00010041, 0x24003dad, 0x00000400, 0xffffffff },
+ { 0x05000010, 0x20003dac, 0x00000402, 0x00000000 },
+ { 0x00010041, 0x24023dad, 0x00000402, 0xffffffff },
+ { 0x04000010, 0x20003dac, 0x00000400, 0x00040004 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000006 },
+ { 0x04000010, 0x20003dac, 0x00000402, 0x00040004 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000002 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x0000004a },
+ { 0x00600001, 0x2c800021, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x2ca00021, 0x008d01a0, 0x00000000 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
+ { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 },
+ { 0x00200040, 0x2fa035ad, 0x00450440, 0x00450a90 },
+ { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 },
+ { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0xffffff94 },
+ { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00020002 },
+ { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 },
+ { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x7e201000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
+ { 0x00000040, 0x24640c21, 0x00000464, 0x00000000 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x30003030 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00040004 },
+ { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 },
+ { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 },
+ { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x08600031, 0x21801cbd, 0x00000800, 0x0a682000 },
+ { 0x05000010, 0x20002528, 0x00000186, 0x00000c86 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000004 },
+ { 0x00600001, 0x21800021, 0x008d0c80, 0x00000000 },
+ { 0x00600001, 0x21a00021, 0x008d0ca0, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0xffffff04 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/shaders/vme/vme.inc b/src/shaders/vme/vme.inc
index dd7e1bb..992c6b9 100644
--- a/src/shaders/vme/vme.inc
+++ b/src/shaders/vme/vme.inc
@@ -54,6 +54,7 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300')
define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700')
define(`REF_REGION_SIZE', `0x2830:UW')
+define(`MIN_REF_REGION_SIZE', `0x2020:UW')
define(`BI_SUB_MB_PART_MASK', `0x0c000000')
define(`MAX_NUM_MV', `0x00000020')
@@ -140,6 +141,7 @@ define(`orig_y_ub', `inline_reg0.1')
define(`transform_8x8_ub', `inline_reg0.4')
define(`slice_edge_ub', `inline_reg0.4')
define(`num_macroblocks', `inline_reg0.6')
+define(`quality_level_ub', `inline_reg0.8')
/*
* GRF 6~11 -- reserved
@@ -277,3 +279,6 @@ define(`vme_msg_4', `msg_reg4')
#endif
+define(`DEFAULT_QUALITY_LEVEL', `0x01')
+define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL')
+define(`LOW_QUALITY_LEVEL', `0x02')
diff --git a/src/shaders/vme/vme7.inc b/src/shaders/vme/vme7.inc
index 3fa99b7..e9d5864 100644
--- a/src/shaders/vme/vme7.inc
+++ b/src/shaders/vme/vme7.inc
@@ -54,6 +54,8 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300')
define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700')
define(`REF_REGION_SIZE', `0x2830:UW')
+define(`MIN_REF_REGION_SIZE', `0x2020:UW')
+define(`DREF_REGION_SIZE', `0x2020:UW')
define(`BI_SUB_MB_PART_MASK', `0x0c000000')
define(`MAX_NUM_MV', `0x00000020')
@@ -132,6 +134,7 @@ define(`orig_y_ub', `inline_reg0.1')
define(`transform_8x8_ub', `inline_reg0.4')
define(`input_mb_intra_ub', `inline_reg0.5')
define(`num_macroblocks', `inline_reg0.6')
+define(`quality_level_ub', `inline_reg0.7')
/*
* GRF 6~11 -- reserved
@@ -291,7 +294,6 @@ define(`mb_mv3', `r96')
define(`mb_ref', `r97')
define(`mb_ref_win', `r84')
-define(`DREF_REGION_SIZE', `0x2020:UW')
define(`PRED_L0', `0x0':uw)
define(`PRED_L1', `0x1':uw)
define(`PRED_BI', `0x2':uw)
@@ -317,3 +319,7 @@ define(`INTER_8X16MODE', `0x02')
define(`OBR_MESSAGE_FENCE', `7')
define(`OBR_MF_NOCOMMIT', `0')
define(`OBR_MF_COMMIT', `0x20')
+
+define(`DEFAULT_QUALITY_LEVEL', `0x01')
+define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL')
+define(`LOW_QUALITY_LEVEL', `0x02')
diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc
index be49056..97e814f 100644
--- a/src/shaders/vme/vme75.inc
+++ b/src/shaders/vme/vme75.inc
@@ -59,6 +59,8 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300')
define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700')
define(`REF_REGION_SIZE', `0x2830:UW')
+define(`MIN_REF_REGION_SIZE', `0x2020:UW')
+define(`DREF_REGION_SIZE', `0x2020:UW')
define(`BI_SUB_MB_PART_MASK', `0x0c000000')
define(`MAX_NUM_MV', `0x00000020')
@@ -133,6 +135,7 @@ define(`orig_y_ub', `inline_reg0.1')
define(`transform_8x8_ub', `inline_reg0.4')
define(`input_mb_intra_ub', `inline_reg0.5')
define(`num_macroblocks', `inline_reg0.6')
+define(`quality_level_ub', `inline_reg0.7')
/*
* GRF 6~11 -- reserved
@@ -311,7 +314,6 @@ define(`mb_mv3', `r96')
define(`mb_ref', `r97')
define(`mb_ref_win', `r84')
-define(`DREF_REGION_SIZE', `0x2020:UW')
define(`PRED_L0', `0x0':uw)
define(`PRED_L1', `0x1':uw)
define(`PRED_BI', `0x2':uw)
@@ -337,3 +339,7 @@ define(`INTER_8X16MODE', `0x02')
define(`OBR_MESSAGE_FENCE', `7')
define(`OBR_MF_NOCOMMIT', `0')
define(`OBR_MF_COMMIT', `0x20')
+
+define(`DEFAULT_QUALITY_LEVEL', `0x01')
+define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL')
+define(`LOW_QUALITY_LEVEL', `0x02')
diff --git a/src/shaders/vme/vme75_mpeg2.inc b/src/shaders/vme/vme75_mpeg2.inc
index 9b877ac..b638056 100644
--- a/src/shaders/vme/vme75_mpeg2.inc
+++ b/src/shaders/vme/vme75_mpeg2.inc
@@ -16,3 +16,18 @@
*/
define(`INTER_PART_MASK', `0x7e000000')
+define(`mpeg2_ref', `r83')
+define(`pic_ref', `r82')
+define(`INTRA16_DC_PRED', `0xBB')
+/* Cost center ref */
+define(`mv_cc_ref', `r81')
+define(`tmp_vme_wb0', `r100')
+define(`tmp_vme_wb1', `r101')
+define(`tmp_vme_wb2', `r102')
+define(`tmp_vme_wb3', `r103')
+define(`tmp_vme_wb4', `r104')
+define(`tmp_vme_wb5', `r105')
+define(`tmp_vme_wb6', `r106')
+define(`tmp_vme_wb7', `r107')
+define(`tmp_vme_wb8', `r108')
+define(`tmp_vme_wb9', `r109')
diff --git a/src/shaders/vme/vme7_mpeg2.inc b/src/shaders/vme/vme7_mpeg2.inc
index 9b877ac..2d7852a 100644
--- a/src/shaders/vme/vme7_mpeg2.inc
+++ b/src/shaders/vme/vme7_mpeg2.inc
@@ -16,3 +16,20 @@
*/
define(`INTER_PART_MASK', `0x7e000000')
+define(`mpeg2_ref', `r83')
+define(`pic_ref', `r82')
+define(`INTRA16_DC_PRED', `0xBB')
+
+/* Cost center ref */
+define(`mv_cc_ref', `r81')
+
+define(`tmp_vme_wb0', `r100')
+define(`tmp_vme_wb1', `r101')
+define(`tmp_vme_wb2', `r102')
+define(`tmp_vme_wb3', `r103')
+define(`tmp_vme_wb4', `r104')
+define(`tmp_vme_wb5', `r105')
+define(`tmp_vme_wb6', `r106')
+define(`tmp_vme_wb7', `r107')
+define(`tmp_vme_wb8', `r108')
+define(`tmp_vme_wb9', `r109')
diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc
new file mode 100644
index 0000000..5b6f469
--- /dev/null
+++ b/src/shaders/vme/vme8.inc
@@ -0,0 +1,347 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: ME_header.inc for Gen8
+//
+// Global symbols define
+//
+
+/*
+ * Constant
+ */
+define(`VME_MESSAGE_TYPE_INTER', `1')
+define(`VME_MESSAGE_TYPE_INTRA', `2')
+define(`VME_MESSAGE_TYPE_MIXED', `3')
+
+define(`VME_SIC_MESSAGE_TYPE', `1')
+define(`VME_IME_MESSAGE_TYPE', `2')
+define(`VME_FBR_MESSAGE_TYPE', `3')
+
+define(`BLOCK_32X1', `0x0000001F')
+define(`BLOCK_4X16', `0x000F0003')
+define(`BLOCK_8X4', `0x00070003')
+
+define(`LUMA_INTRA_16x16_DISABLE', `0x1')
+define(`LUMA_INTRA_8x8_DISABLE', `0x2')
+define(`LUMA_INTRA_4x4_DISABLE', `0x4')
+
+define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60')
+define(`INTRA_PRED_AVAIL_FLAG_B', `0x10')
+define(`INTRA_PRED_AVAIL_FLAG_C', `0x8')
+define(`INTRA_PRED_AVAIL_FLAG_D', `0x4')
+
+define(`BIND_IDX_VME', `0')
+define(`BIND_IDX_VME_REF0', `1')
+define(`BIND_IDX_VME_REF1', `2')
+define(`BIND_IDX_OUTPUT', `3')
+define(`BIND_IDX_INEP', `4')
+
+define(`SUB_PEL_MODE_INTEGER', `0x00000000')
+define(`SUB_PEL_MODE_HALF', `0x00001000')
+define(`SUB_PEL_MODE_QUARTER', `0x00003000')
+
+define(`INTER_SAD_NONE', `0x00000000')
+define(`INTER_SAD_HAAR', `0x00200000')
+
+define(`INTRA_SAD_NONE', `0x00000000')
+define(`INTRA_SAD_HAAR', `0x00800000')
+
+define(`INTER_PART_MASK', `0x00000000')
+
+define(`SEARCH_CTRL_SINGLE', `0x00000000')
+define(`SEARCH_CTRL_DUAL_START', `0x00000100')
+define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300')
+define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700')
+
+define(`REF_REGION_SIZE', `0x2830:UW')
+define(`MIN_REF_REGION_SIZE', `0x2020:UW')
+define(`DREF_REGION_SIZE', `0x2020:UW')
+
+define(`BI_SUB_MB_PART_MASK', `0x0c000000')
+define(`MAX_NUM_MV', `0x00000020')
+define(`FB_PRUNING_ENABLE', `0x40000000')
+
+define(`SEARCH_PATH_LEN', `0x00003030')
+define(`START_CENTER', `0x30000000')
+
+define(`ADAPTIVE_SEARCH_ENABLE', `0x00000002')
+define(`INTRA_PREDICTORE_MODE', `0x11111111:UD')
+
+define(`INTER_VME_OUTPUT_IN_OWS', `10')
+define(`INTER_VME_OUTPUT_MV_IN_OWS', `8')
+
+define(`INTRAMBFLAG_MASK', `0x00002000')
+define(`MVSIZE_UW_BASE', `0x0040')
+define(`MFC_MV32_BIT_SHIFT', `5')
+define(`CBP_DC_YUV_UW', `0x000E')
+
+define(`DC_HARR_ENABLE', `0x0000')
+define(`DC_HARR_DISABLE', `0x0020')
+
+define(`MV32_BIT_MASK', `0x0020')
+define(`MV32_BIT_SHIFT', `5')
+
+define(`OBW_CACHE_TYPE', `10')
+
+
+define(`OBW_MESSAGE_TYPE', `8')
+
+define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT')
+
+define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2', `2') /* 2 OWords */
+define(`OBW_CONTROL_3', `3') /* 4 OWords */
+define(`OBW_CONTROL_8', `4') /* 8 OWords */
+
+define(`FBR_BME_ENABLE', `0x00000000')
+define(`FBR_BME_DISABLE', `0x00040000')
+
+define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+
+define(`OBW_HEADER_PRESENT', `1')
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r11 reserved
+ * r12 write back of VME message
+ * r13 write back of Oword Block Write
+ */
+/*
+ * GRF 0 -- header
+ */
+define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
+
+/*
+ * GRF 1~4 -- Constant Buffer (reserved)
+ */
+
+/*
+ * GRF 5 -- inline data
+ */
+define(`inline_reg0', `r5')
+define(`w_in_mb_uw', `inline_reg0.2')
+define(`orig_xy_ub', `inline_reg0.0')
+define(`orig_x_ub', `inline_reg0.0') /* in macroblock */
+define(`orig_y_ub', `inline_reg0.1')
+define(`transform_8x8_ub', `inline_reg0.4')
+define(`input_mb_intra_ub', `inline_reg0.5')
+define(`num_macroblocks', `inline_reg0.6')
+define(`quality_level_ub', `inline_reg0.7')
+
+/*
+ * GRF 6~11 -- reserved
+ */
+
+/*
+ * GRF 12~15 -- write back for VME message
+ */
+define(`vme_wb', `r12')
+define(`vme_wb0', `r12')
+define(`vme_wb1', `r13')
+define(`vme_wb2', `r14')
+define(`vme_wb3', `r15')
+define(`vme_wb4', `r16')
+define(`vme_wb5', `r17')
+define(`vme_wb6', `r18')
+define(`vme_ime_wb7', `r19')
+define(`vme_ime_wb8', `r20')
+define(`vme_ime_wb9', `r21')
+define(`vme_ime_wb10', `r22')
+
+
+/*
+ * GRF 24 -- write for VME output message
+ */
+define(`obw_wb', `null<1>:W')
+define(`obw_wb_length', `0')
+
+
+/*
+ * GRF 28~30 -- Intra Neighbor Edge Pixels
+ */
+define(`INEP_ROW', `r28')
+define(`INEP_COL0', `r29')
+define(`INEP_COL1', `r30')
+
+/*
+ * GRF 48~50 -- Chroma Neighbor Edge Pixels
+ */
+define(`CHROMA_ROW', `r48')
+define(`CHROMA_COL', `r49')
+
+/*
+ * temporary registers
+ */
+define(`tmp_reg0', `r32')
+define(`read0_header', `tmp_reg0')
+define(`tmp_reg1', `r33')
+define(`read1_header', `tmp_reg1')
+define(`tmp_reg2', `r34')
+define(`vme_m0', `tmp_reg2')
+define(`tmp_reg3', `r35')
+define(`vme_m1', `tmp_reg3')
+define(`intra_flag', `vme_m1.28')
+define(`intra_part_mask_ub', `vme_m1.28')
+define(`mb_intra_struct_ub', `vme_m1.29')
+define(`tmp_reg4', `r36')
+define(`obw_m0', `tmp_reg4')
+define(`tmp_reg5', `r37')
+define(`obw_m1', `tmp_reg5')
+define(`tmp_reg6', `r38')
+define(`obw_m2', `tmp_reg6')
+define(`tmp_reg7', `r39')
+define(`obw_m3', `tmp_reg7')
+define(`tmp_reg8', `r40')
+define(`obw_m4', `tmp_reg8')
+define(`tmp_reg9', `r41')
+define(`tmp_x_w', `tmp_reg9.0')
+define(`tmp_rega', `r42')
+define(`tmp_ud0', `tmp_rega.0')
+define(`tmp_ud1', `tmp_rega.4')
+define(`tmp_ud2', `tmp_rega.8')
+define(`tmp_ud3', `tmp_rega.12')
+define(`tmp_uw0', `tmp_rega.0')
+define(`tmp_uw1', `tmp_rega.2')
+define(`tmp_uw2', `tmp_rega.4')
+define(`tmp_uw3', `tmp_rega.6')
+define(`tmp_uw4', `tmp_rega.8')
+define(`tmp_uw5', `tmp_rega.10')
+define(`tmp_uw6', `tmp_rega.12')
+define(`tmp_uw7', `tmp_rega.14')
+
+define(`vme_m2', `r43')
+define(`vme_m3', `r44')
+/*
+ * MRF registers
+ */
+
+define(`msg_ind', `64')
+define(`msg_reg0', `r64')
+define(`msg_reg1', `r65')
+define(`msg_reg2', `r66')
+define(`msg_reg3', `r67')
+define(`msg_reg4', `r68')
+define(`msg_reg5', `r69')
+define(`msg_reg6', `r70')
+define(`msg_reg7', `r71')
+define(`msg_reg8', `r72')
+define(`msg_reg9', `r73')
+
+define(`ts_msg_ind', `112')
+define(`ts_msg_reg0', `r112')
+/*
+ * VME message payload
+ */
+
+define(`vme_intra_wb_length', `1')
+define(`vme_wb_length', `7')
+define(`sic_vme_msg_length', `8')
+define(`fbr_vme_msg_length', `8')
+define(`ime_vme_msg_length', `6')
+
+define(`vme_msg_ind', `msg_ind')
+define(`vme_msg_0', `msg_reg0')
+define(`vme_msg_1', `msg_reg1')
+define(`vme_msg_2', `msg_reg2')
+
+define(`vme_msg_3', `msg_reg3')
+define(`vme_msg_4', `msg_reg4')
+
+
+define(`vme_msg_5', `msg_reg5')
+define(`vme_msg_6', `msg_reg6')
+define(`vme_msg_7', `msg_reg7')
+define(`vme_msg_8', `msg_reg8')
+define(`vme_msg_9', `msg_reg9')
+
+define(`BIND_IDX_CBCR', `6')
+
+
+define(`LUMA_CHROMA_MODE', `0x0')
+define(`LUMA_INTRA_MODE', `0x1')
+define(`LUMA_INTRA_DISABLE', `0x2')
+
+define(`RETURN_REG', `r127.0')
+define(`RET_ARG', `r127.4')
+
+/* Now at most two registers are used for input parameter */
+define(`INPUT_ARG0', `r125')
+define(`INPUT_ARG1', `r126')
+
+/* Two temporal registers are used in the function */
+define(`TEMP_VAR0', `r123')
+define(`TEMP_VAR1', `r124')
+
+
+define(`OBR_MESSAGE_TYPE', `0')
+define(`OBR_CACHE_TYPE', `10')
+define(`OBR_BIND_IDX', `BIND_IDX_OUTPUT')
+
+define(`OBR_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBR_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBR_CONTROL_2', `2') /* 2 OWords */
+define(`OBR_CONTROL_4', `3') /* 4 OWords */
+define(`OBR_CONTROL_8', `4') /* 8 OWords */
+define(`OBR_WRITE_COMMIT_CATEGORY', `0') /* category on SNB+ for Data port */
+define(`OBR_HEADER_PRESENT', `1')
+
+define(`mb_hwdep', `r5.6')
+define(`MB_AVAIL', `1:d')
+define(`MB_PRED_FLAG', `1:w')
+
+define(`mb_pred_mode', `r85')
+define(`mb_mvp_ref', `r86')
+define(`mba_result', `r87')
+define(`mbb_result', `r88')
+define(`mbc_result', `r89')
+define(`mb_ind', `90')
+define(`mb_msg0', `r90')
+define(`mb_wb', `r91')
+define(`mb_intra_wb', `r91')
+define(`mb_inter_wb', `r92')
+define(`mb_mv0', `r93')
+define(`mb_mv1', `r94')
+define(`mb_mv2', `r95')
+define(`mb_mv3', `r96')
+define(`mb_ref', `r97')
+define(`mb_ref_win', `r84')
+
+define(`PRED_L0', `0x0':uw)
+define(`PRED_L1', `0x1':uw)
+define(`PRED_BI', `0x2':uw)
+define(`PRED_DIRECT', `0x3':uw)
+define(`PRED_MASK', `0x3':uw)
+
+/* The MAX search len per reference is 16 */
+define(`DSEARCH_PATH_LEN', `0x00001212')
+define(`BI_WEIGHT', `0x20':uw)
+define(`DSTART_CENTER', `0x00000000')
+define(`INTER_MASK', `0x03')
+define(`INTER_16X16MODE', `0x0')
+define(`INTER_16X8MODE', `0x01')
+define(`INTER_8X16MODE', `0x02')
+define(`INTER_8X8MODE', `0x03')
+define(`INTER_BLOCK0', `0x0')
+define(`INTER_BLOCK1', `0x1')
+define(`INTER_BLOCK2', `0x2')
+define(`INTER_BLOCK3', `0x3')
+define(`INTER_16X8MODE', `0x01')
+define(`INTER_8X16MODE', `0x02')
+
+
+define(`OBR_MESSAGE_FENCE', `7')
+define(`OBR_MF_NOCOMMIT', `0')
+define(`OBR_MF_COMMIT', `0x20')
+
+define(`DEFAULT_QUALITY_LEVEL', `0x01')
+define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL')
+define(`LOW_QUALITY_LEVEL', `0x02')
diff --git a/src/va_backend_compat.h b/src/va_backend_compat.h
index f5c9f75..8767153 100644
--- a/src/va_backend_compat.h
+++ b/src/va_backend_compat.h
@@ -45,4 +45,9 @@
# define VA_DRM_AUTH_CUSTOM VA_DUMMY
#endif
+#if !VA_CHECK_VERSION(0,35,2)
+# define VAProfileH264MultiviewHigh 15
+# define VAProfileH264StereoHigh 16
+#endif
+
#endif /* VA_BACKEND_COMPAT_H */