summaryrefslogtreecommitdiff
path: root/src/shaders/h264/mc/Intra_4x4.asm
blob: 11699833837ee993b5a067e4fc833eaf978356db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*
 * Decode Intra_4x4 macroblock
 * Copyright © <2010>, Intel Corporation.
 *
 * This program is licensed under the terms and conditions of the
 * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
 * http://www.opensource.org/licenses/eclipse-1.0.php.
 *
 */
// Kernel name: Intra_4x4.asm
//
// Decoding of Intra_4x4 macroblock
//
//  $Revision: 12 $
//  $Date: 10/18/06 4:10p $
//

// ----------------------------------------------------
//  Main: Intra_4x4
// ----------------------------------------------------

#define	INTRA_4X4

.kernel Intra_4x4
INTRA_4x4:

#ifdef _DEBUG
// WA for FULSIM so we'll know which kernel is being debugged
mov (1) acc0:ud 0x02aa55a5:ud
#endif

#include "SetupForHWMC.asm"

#undef		PPREDBUF_Y
#define	    PPREDBUF_Y		a0.3	// Pointer to predicted Y picture

#define		REG_INTRA_PRED_AVAIL	REG_INTRA_TEMP_4
#define		REG_INTRA_4X4_PRED		REG_INTRA_TEMP_7		// Store predicted Intra_4x4 data

// Offset where 4x4 predicted data blocks are stored
#define	PREDSUBBLK0		0*GRFWIB
#define	PREDSUBBLK1		1*GRFWIB
#define	PREDSUBBLK2		2*GRFWIB
#define	PREDSUBBLK3		3*GRFWIB
#define	PREDSUBBLK4		4*GRFWIB
#define	PREDSUBBLK5		5*GRFWIB
#define	PREDSUBBLK6		6*GRFWIB
#define	PREDSUBBLK7		7*GRFWIB
#define	PREDSUBBLK8		8*GRFWIB
#define	PREDSUBBLK9		9*GRFWIB
#define	PREDSUBBLK10	10*GRFWIB
#define	PREDSUBBLK11	11*GRFWIB
#define	PREDSUBBLK12	12*GRFWIB
#define	PREDSUBBLK13	13*GRFWIB
#define	PREDSUBBLK14	14*GRFWIB
#define	PREDSUBBLK15	15*GRFWIB

// 4x4 error block byte offset within the 8x8 error block
#define ERRBLK0		0
#define ERRBLK1		8
#define ERRBLK2		64
#define ERRBLK3		72

#ifdef SW_SCOREBOARD    
    CALL(scoreboard_start_intra,1)
#endif

#ifdef SW_SCOREBOARD    
	wait	n0:ud		//	Now wait for scoreboard to response
#endif

//
//  Decode Y blocks
//
//	Load reference data from neighboring macroblocks
    CALL(load_Intra_Ref_Y,1)

	mov	(1)	PERROR<1>:w	ERRBUF*GRFWIB:w			// Pointer to macroblock error data
	mov	(1)	PPREDBUF_Y<1>:w	PREDBUF*GRFWIB:w	// Pointer to predicted data
	shr (2)	REG_INTRA_PRED_AVAIL<1>:w	REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub	0x40:v
    and.nz.f0.0 (8)	NULLREG	REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub	4:w	// Top-right macroblock available for intra prediction?
	(-f0.0.any8h) mov (8)	INTRA_REF_TOP(0,16)<1>	INTRA_REF_TOP(0,15)REGION(1,0)	// Extend right boundary of MB B to C

//	Intra predict Intra_4x4 luma blocks
//
//	Sub-macroblock 0 *****************
	mov	(16)	REF_TOP0(0)<1>	INTRA_REF_TOP0(0)	// Top reference data
	mov	(8)		REF_LEFT(0)<1>	INTRA_REF_LEFT(0)REGION(8,4)	// Left reference data
	shr	(4)		PRED_MODE<1>:w	INTRA_PRED_MODE(0)<1;2,0>	0x4040:v	// Expand IntraPredMode to 1 byte/block
	CALL(intra_Pred_4x4_Y_4,1)
    add (1)		PERROR<1>:w	PERROR<0;1,0>:w	0x0080:w	// Pointers to next error block

	or  (1)	REG_INTRA_PRED_AVAIL<1>:w	REG_INTRA_PRED_AVAIL<0;1,0>:w	0x1:w	// Left neighbor is available now

//	Sub-macroblock 1 *****************

	mov	(16)	REF_TOP0(0)<1>	INTRA_REF_TOP0(0,8)	// Top reference data
	mov	(4)		REF_LEFT(0)<1>	r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub	// Left reference data (top half)
	mov	(4)		REF_LEFT(0,4)<1>	r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub	// Left reference data (bottom half)
	shr	(4)		PRED_MODE<1>:w	INTRA_PRED_MODE(0,2)<1;2,0>	0x4040:v	// Expand IntraPredMode to 1 byte/block
	add	(1)		PPREDBUF_Y<1>:w	PPREDBUF_Y<0;1,0>:w	4*GRFWIB:w	// Pointer to predicted sub-macroblock 1
	CALL(intra_Pred_4x4_Y_4,1)
    add (1)		PERROR<1>:w	PERROR<0;1,0>:w	0x0080:w	// Pointers to next error block

	or  (1)	REG_INTRA_PRED_AVAIL<1>:w	REG_INTRA_PRED_AVAIL.1<0;1,0>:w	0x2:w	// Top neighbor is available now

//	Pack constructed data from word-aligned to byte-aligned format
//	to speed up save_4x4_Y module later
//	PPREDBUF_Y now points to sub-block #4
	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub		// Sub-block 0
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub	// Sub-block 1
	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub		// Sub-block 2
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub	// Sub-block 3

	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub	r[PPREDBUF_Y]<32;16,2>:ub				// Sub-block 4
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub	r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub	// Sub-block 5
	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub	r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub		// Sub-block 6
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub	r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub	// Sub-block 7

//	Sub-macroblock 2 *****************

	mov	(4)		REF_TOP0(0)<1>		INTRA_REF_LEFT0(0,28)REGION(4,1)		// Top-left reference data
	mov	(8)		REF_TOP0(0,4)<1>	r[PPREDBUF_Y,0-2*GRFWIB+12]<16;4,1>:ub	// Top reference data from SB 2,3
	mov	(8)		REF_TOP0(0,12)<1>	r[PPREDBUF_Y,0-GRFWIB+12]<16;4,1>:ub	// Top reference data from SB 6,7
	mov	(8)		REF_TOP0(0,20)<1>	r[PPREDBUF_Y,0-GRFWIB+31]<0;1,0>:ub		// Top-right reference data
	mov	(16)	REG_INTRA_REF_TOP<1>:w	REF_TOP_W(0)		// Store top reference data for SubMB #2 and #3.
	mov	(8)		REF_LEFT(0)<1>		INTRA_REF_LEFT(1)REGION(8,4)	// Left reference data
	shr	(4)		PRED_MODE<1>:w		INTRA_PRED_MODE(0,4)<1;2,0>	0x4040:v	// Expand IntraPredMode to 1 byte/block
	CALL(intra_Pred_4x4_Y_4,1)
    add (1)		PERROR<1>:w	PERROR<0;1,0>:w	0x0080:w	// Pointers to next error block

	or  (1)	REG_INTRA_PRED_AVAIL<1>:w	REG_INTRA_PRED_AVAIL<0;1,0>:w	0x1:w	// Left neighbor is available now

//	Sub-macroblock 3 *****************

	mov	(16)	REF_TOP0(0)<1>	INTRA_REF_TOP0(0,8)		// Top reference data
	mov	(8)		REF_TOP0(0,16)<1>	INTRA_REF_TOP0(0,24)REGION(8,1)	// Top reference data
	mov	(4)		REF_LEFT(0)<1>	r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub	// Left reference data (top half)
	mov	(4)		REF_LEFT(0,4)<1>	r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub	// Left reference data (bottom half)
	shr	(4)		PRED_MODE<1>:w	INTRA_PRED_MODE(0,6)<1;2,0>	0x4040:v	// Expand IntraPredMode to 1 byte/block
	add	(1)		PPREDBUF_Y<1>:w	PPREDBUF_Y<0;1,0>:w	4*GRFWIB:w	// Pointer to predicted sub-macroblock 3
	CALL(intra_Pred_4x4_Y_4,1)

//	Pack constructed data from word-aligned to byte-aligned format
//	to speed up save_4x4_Y module later
//	PPREDBUF_Y now points to sub-block #12
	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub		// Sub-block 8
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub	// Sub-block 9
	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub		// Sub-block 10
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub	r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub	// Sub-block 11

	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub	r[PPREDBUF_Y]<32;16,2>:ub				// Sub-block 12
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub	r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub	// Sub-block 13
	mov (16)	r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub	r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub		// Sub-block 14
	mov (16)	r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub	r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub	// Sub-block 15

//	All 4 sub-macroblock (containing 4 intra_4x4 blocks) have be constructed
//	Save constructed Y picture
	CALL(save_4x4_Y,1)		// Save Intra_4x4 predicted luma data.
//
//  Decode U/V blocks
//
//	Note: The decoding for chroma blocks will be the same for all intra prediction mode
//
	CALL(decode_Chroma_Intra,1)

#ifdef SW_SCOREBOARD
    #include "scoreboard_update.asm"
#endif

// Terminate the thread
//
    #include "EndIntraThread.asm"

// End of Intra_4x4