summaryrefslogtreecommitdiff
path: root/src/shaders/vme/inter_frame_haswell.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/shaders/vme/inter_frame_haswell.asm')
-rw-r--r--src/shaders/vme/inter_frame_haswell.asm338
1 files changed, 338 insertions, 0 deletions
diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm
index 5bb8ba7..36b394a 100644
--- a/src/shaders/vme/inter_frame_haswell.asm
+++ b/src/shaders/vme/inter_frame_haswell.asm
@@ -15,6 +15,9 @@
// Now, begin source code....
//
+#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud
+#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud
+
/*
* __START
*/
@@ -73,6 +76,294 @@ mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1};
mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+/* read back the data for MB A */
+/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
+* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
+*/
+mov (8) mba_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbb_result.0<1>:ud 0x0:ud {align1};
+mov (8) mbc_result.0<1>:ud 0x0:ud {align1};
+mba_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+mov (1) mba_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbb_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB A */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* MV */
+mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1};
+mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbb_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+/* MB B doesn't exist. Zero MV. mba_flag is zero */
+/* If MB B doesn't exist, neight of MB C nor D exists */
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+mov (1) mbb_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mbc_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB B */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
+mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1};
+
+mbc_start:
+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1};
+/* MB C doesn't exist. Zero MV. mba_flag is zero */
+/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
+(f0.0) jmpi (1) mbd_start;
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB C */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
+
+jmpi (1) mb_mvp_start;
+mbd_start:
+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
+add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1};
+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
+(f0.0) jmpi (1) mb_mvp_start;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB D */
+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ub
+ NULL
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ OBR_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+
+/* Forward MV */
+mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1};
+mov (1) mbc_result.18<1>:w MB_PRED_FLAG {align1};
+
+mb_mvp_start:
+/*TODO: Add the skip prediction */
+/* Check whether both MB and C are invailable */
+add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
+cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
+(-f0.0) jmpi (1) mb_median_start;
+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1};
+(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1};
+(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1};
+jmpi (1) __mb_hwdep_end;
+
+mb_median_start:
+/* check whether only one neighbour MB has the same ref ID with the current MB */
+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
+cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1};
+cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1};
+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1};
+cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1};
+(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
+(f0.0) jmpi (1) __mb_hwdep_end;
+
+mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1};
+SAVE_RET {align1};
+ jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1};
+mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1};
+SAVE_RET {align1};
+jmpi (1) word_imedian;
+mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
+
+__mb_hwdep_end:
/* m2, get the MV/Mb cost passed from constant buffer when
spawning thread by MEDIA_OBJECT */
mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
@@ -196,6 +487,9 @@ mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
/* the Max MV number is passed by constant buffer */
mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+/* Set the MV cost center */
+mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
+mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
@@ -348,3 +642,47 @@ __EXIT:
*/
mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+
+ nop ;
+ nop ;
+/* Compare three word data to get the min value */
+word_imin:
+ cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ RETURN {align1};
+
+/* Compare three word data to get the max value */
+word_imax:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ RETURN {align1};
+
+word_imedian:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_a_ge_b;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ jmpi (1) cmp_end;
+cmp_a_ge_b:
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
+ (f0.0) jmpi (1) cmp_end;
+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
+cmp_end:
+ RETURN {align1};
+