diff options
author | Zhao Yakui <yakui.zhao@intel.com> | 2012-12-28 16:30:57 +0800 |
---|---|---|
committer | Xiang, Haihao <haihao.xiang@intel.com> | 2013-01-17 13:08:40 +0800 |
commit | df0914b9779110b014895d706dbf68e028392b63 (patch) | |
tree | 7ca4295e6f3b19e27985c61bf4a4552cbef87032 /src/shaders | |
parent | f3d28947b7e4ba91fa7c273433ec54de56a9e83e (diff) |
Adjust the reference window based on MVP prediction to optimize VME param on Haswell
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Diffstat (limited to 'src/shaders')
-rw-r--r-- | src/shaders/vme/inter_bframe_haswell.asm | 13 | ||||
-rw-r--r-- | src/shaders/vme/inter_bframe_haswell.g75b | 28 | ||||
-rw-r--r-- | src/shaders/vme/inter_frame_haswell.asm | 11 | ||||
-rw-r--r-- | src/shaders/vme/inter_frame_haswell.g75b | 14 | ||||
-rw-r--r-- | src/shaders/vme/vme75.inc | 1 |
5 files changed, 57 insertions, 10 deletions
diff --git a/src/shaders/vme/inter_bframe_haswell.asm b/src/shaders/vme/inter_bframe_haswell.asm index 9ab35d3..e02748e 100644 --- a/src/shaders/vme/inter_bframe_haswell.asm +++ b/src/shaders/vme/inter_bframe_haswell.asm @@ -78,6 +78,7 @@ send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen mov (8) vme_m1.0<1>:ud 0:ud {align1}; mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; (f0.0) jmpi (1) __mb_hwdep_end; @@ -462,6 +463,9 @@ jmpi (1) word_imedian; mov (1) mb_mvp_ref.6<1>:w RET_ARG<0,1,0>:w {align1}; __mb_hwdep_end: +asr (4) mb_ref_win.0<1>:w mb_mvp_ref.0<4,4,1>:w 2:w {align1}; +add (4) mb_ref_win.8<1>:w mb_ref_win.0<4,4,1>:w 3:w {align1}; +and (4) mb_ref_win.16<1>:uw mb_ref_win.8<4,4,1>:uw 0xFFFC:uw {align1}; /* m2, get the MV/Mb cost passed from constant buffer when spawning thread by MEDIA_OBJECT */ mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; @@ -580,7 +584,16 @@ mov (1) vme_m0.0<1>:W -8:W {align1}; mov (1) vme_m0.2<1>:W -8:W {align1}; mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +(f0.0) add (1) vme_m0.4<1>:w vme_m0.4<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; +(f0.0) add (1) vme_m0.6<1>:w vme_m0.6<0,1,0>:w 4:w {align1}; +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.20<2,2,1>:w {align1}; + mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; diff --git a/src/shaders/vme/inter_bframe_haswell.g75b b/src/shaders/vme/inter_bframe_haswell.g75b index 6327a32..e0ef98e 100644 --- a/src/shaders/vme/inter_bframe_haswell.g75b +++ b/src/shaders/vme/inter_bframe_haswell.g75b @@ -35,6 +35,7 @@ { 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 }, { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, { 0x00010020, 0x34001c00, 0x00001400, 0x00000cb0 }, { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, @@ -63,7 +64,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000012f0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000013a0 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 }, @@ -99,7 +100,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000010b0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00001160 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 }, @@ -134,7 +135,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000e80 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000f30 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 }, @@ -166,7 +167,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000c80 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000d30 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, @@ -207,13 +208,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000008f0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000009a0 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000890 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000940 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, { 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 }, @@ -232,14 +233,17 @@ { 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000760 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000810 }, { 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000700 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000007b0 }, { 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 }, + { 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 }, + { 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 }, + { 0x00400005, 0x2a902d29, 0x00690a88, 0xfffcfffc }, { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, @@ -284,6 +288,14 @@ { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 }, { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 }, + { 0x00010040, 0x24443dad, 0x00000444, 0x00040004 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 }, + { 0x00010040, 0x24463dad, 0x00000446, 0x00040004 }, + { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, + { 0x00200040, 0x244435ad, 0x00450444, 0x00450a94 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm index 36b394a..fa9a0a0 100644 --- a/src/shaders/vme/inter_frame_haswell.asm +++ b/src/shaders/vme/inter_frame_haswell.asm @@ -77,6 +77,7 @@ mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; (f0.0) jmpi (1) __mb_hwdep_end; /* read back the data for MB A */ @@ -364,6 +365,9 @@ jmpi (1) word_imedian; mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; __mb_hwdep_end: +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; /* m2, get the MV/Mb cost passed from constant buffer when spawning thread by MEDIA_OBJECT */ mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; @@ -481,6 +485,13 @@ mov (1) vme_m0.2<1>:W -12:W {align1}; mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; + +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b index 1ef526c..2ef1826 100644 --- a/src/shaders/vme/inter_frame_haswell.g75b +++ b/src/shaders/vme/inter_frame_haswell.g75b @@ -34,6 +34,7 @@ { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, { 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 }, { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, { 0x00010020, 0x34001c00, 0x00001400, 0x00000710 }, { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, @@ -141,14 +142,17 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000007b0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000840 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000750 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000007e0 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, + { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, + { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, + { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, @@ -193,6 +197,12 @@ { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 }, + { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, + { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc index 5a121b9..1c286c0 100644 --- a/src/shaders/vme/vme75.inc +++ b/src/shaders/vme/vme75.inc @@ -309,6 +309,7 @@ define(`mb_mv1', `r94') define(`mb_mv2', `r95') define(`mb_mv3', `r96') define(`mb_ref', `r97') +define(`mb_ref_win', `r84') define(`DREF_REGION_SIZE', `0x2020:UW') define(`PRED_L0', `0x0':uw) |