From 3a9d5871e6b0e4f00c47037b376611a15dc38705 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 21 Jan 2011 09:35:46 +0800 Subject: i965_drv_video: build shaders for decoding & post proscessing Signed-off-by: Xiang, Haihao Singed-off-by: Chen, Yangyang Singed-off-by: Han, Haofu --- shaders/gpp.py | 200 ++++++++++++++++++++++++++++++++++++ shaders/h264/mc/Makefile.am | 158 +++++++++++++++++++++++++++- shaders/h264/mc/list | 21 ++++ shaders/post_processing/Makefile.am | 93 ++++++++++++++++- 4 files changed, 470 insertions(+), 2 deletions(-) create mode 100755 shaders/gpp.py create mode 100644 shaders/h264/mc/list diff --git a/shaders/gpp.py b/shaders/gpp.py new file mode 100755 index 0000000..7e43f13 --- /dev/null +++ b/shaders/gpp.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python +#coding=UTF-8 + +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Authors: +# Chen, Yangyang +# Han, Haofu +# + +import sys + +class Block: + def __init__(self, ln=0, s=None): + assert type(ln) == int + assert type(s) == str or s == None + self.lineno = ln + self.text = s + self.subblocks = [] + + def append(self, block): + self.subblocks.append(block) + + def checkfor(self, line): + import re + p = r'\$\s*for\s*' + if re.match(p, line) == None: + raise Exception(self.__errmsg('syntax error')) + tail = line.split('(', 1)[1].rsplit(')', 1) + conds = tail[0].split(';') + lb = tail[1] + if lb.strip() != '{': + raise Exception(self.__errmsg('missing "{"')) + if len(conds) != 3: + raise Exception(self.__errmsg('syntax error(miss ";"?)')) + init = conds[0] + cond = conds[1] + step = conds[2] + self.__parse_init(init) + self.__parse_cond(cond) + self.__parse_step(step) + + def __parse_init(self, init): + inits = init.split(',') + self.param_init = [] + for ini in inits: + try: + val = eval(ini) + self.param_init.append(val) + except: + raise Exception(self.__errmsg('non an exp: %s'%ini)) + self.param_num = len(inits) + + def __parse_cond(self, cond): + cond = cond.strip() + if cond[0] in ['<', '>']: + if cond[1] == '=': + self.param_op = cond[:2] + limit = cond[2:] + else: + self.param_op = cond[0] + limit = cond[1:] + try: + self.param_limit = eval(limit) + except: + raise Exception(self.__errmsg('non an exp: %s'%limit)) + else: + raise Exception(self.__errmsg('syntax error')) + + def __parse_step(self, step): + steps = step.split(',') + if len(steps) != self.param_num: + raise Exception(self.__errmsg('params number no match')) + self.param_step = [] + for st in steps: + try: + val = eval(st) + self.param_step.append(val) + except: + raise Exception(self.__errmsg('non an exp: %s'%st)) + + def __errmsg(self, msg=''): + return '%d: %s' % (self.lineno, msg) + +def readlines(f): + lines = f.readlines() + buf = [] + for line in lines: + if '\\n' in line: + tmp = line.split('\\n') + buf.extend(tmp) + else: + buf.append(line) + return buf + +def parselines(lines): + root = Block(0) + stack = [root] + lineno = 0 + for line in lines: + lineno += 1 + line = line.strip() + if line.startswith('$'): + block = Block(lineno) + block.checkfor(line) + stack[-1].append(block) + stack.append(block) + elif line.startswith('}'): + stack.pop() + elif line and not line.startswith('#'): + stack[-1].append(Block(lineno, line)) + return root + +def writeblocks(outfile, blocks): + buf = [] + + def check_cond(op, cur, lim): + assert op in ['<', '>', '<=', '>='] + assert type(cur) == int + assert type(lim) == int + return eval('%d %s %d' % (cur, op, lim)) + + def do_writeblock(block, curs): + if block.text != None: + import re + p = r'\%(\d+)' + newline = block.text + params = set(re.findall(p, block.text)) + for param in params: + index = int(param) - 1 + if index >= len(curs): + raise Exception('%d: too many param(%%%d)'%(block.lineno, index+1)) + newline = newline.replace('%%%d'%(index+1), str(curs[index])) + if newline and \ + not newline.startswith('.') and \ + not newline.endswith(':') and \ + not newline.endswith(';'): + newline += ';' + buf.append(newline) + else: + for_curs = block.param_init + while check_cond(block.param_op, for_curs[0], block.param_limit): + for sblock in block.subblocks: + do_writeblock(sblock, for_curs) + for i in range(0, block.param_num): + for_curs[i] += block.param_step[i] + + for block in blocks.subblocks: + do_writeblock(block, []) + outfile.write('\n'.join(buf)) + outfile.write('\n') + +if __name__ == '__main__': + argc = len(sys.argv) + if argc == 1: + print >>sys.stderr, 'no input file' + sys.exit(0) + + try: + infile = open(sys.argv[1], 'r') + except IOError: + print >>sys.stderr, 'can not open %s' % sys.argv[1] + sys.exit(1) + + if argc == 2: + outfile = sys.stdout + else: + try: + outfile = open(sys.argv[2], 'w') + except IOError: + print >>sys.stderr, 'can not write to %s' % sys.argv[2] + sys.exit(1) + + lines = readlines(infile) + try: + infile.close() + except IOError: + pass + + blocks = parselines(lines) + writeblocks(outfile, blocks) diff --git a/shaders/h264/mc/Makefile.am b/shaders/h264/mc/Makefile.am index 9f97eb0..a4ab3c0 100644 --- a/shaders/h264/mc/Makefile.am +++ b/shaders/h264/mc/Makefile.am @@ -7,6 +7,151 @@ INTEL_G4B = null.g4b INTEL_G4B_GEN5 = null.g4b.gen5 +INTEL_MC_G4B_GEN5 = avc_mc.g4b.gen5 +INTEL_MC_EXPORT_GEN5 = export.inc.gen5 + +INTEL_MC_ASM = \ + add_Error_16x16_Y.asm \ + add_Error_UV.asm \ + AllAVC.asm \ + AllAVCField.asm \ + AllAVCFrame.asm \ + AllAVCMBAFF.asm \ + AllIntra.asm \ + AVCMCInter.asm \ + BSDReset.asm \ + chromaMVAdjust.asm \ + DCResetDummy.asm \ + Decode_Chroma_Intra.asm \ + EndIntraThread.asm \ + initialize_MBPara.asm \ + interpolate_C_2x2.asm \ + interpolate_C_4x4.asm \ + interpolate_Y_4x4.asm \ + interpolate_Y_8x8.asm \ + Intra_16x16.asm \ + Intra_4x4.asm \ + Intra_8x8.asm \ + Intra_funcLib.asm \ + Intra_PCM.asm \ + intra_pred_16x16_Y.asm \ + intra_Pred_4x4_Y_4.asm \ + intra_Pred_8x8_Y.asm \ + intra_Pred_Chroma.asm \ + load_Intra_Ref_UV.asm \ + load_Intra_Ref_Y.asm \ + loadRef_C_10x5.asm \ + loadRef_C_6x3.asm \ + loadRef_Y_16x13.asm \ + loadRef_Y_16x9.asm \ + recon_C_4x4.asm \ + recon_Y_8x8.asm \ + roundShift_C_4x4.asm \ + save_16x16_Y.asm \ + save_4x4_Y.asm \ + save_8x8_UV.asm \ + save_8x8_Y.asm \ + save_I_PCM.asm \ + scoreboard.asm \ + scoreboard_MBAFF.asm \ + scoreboard_restore_AS.asm \ + scoreboard_save_AS.asm \ + scoreboard_sip.asm \ + scoreboard_start_inter.asm \ + scoreboard_start_intra.asm \ + scoreboard_update.asm \ + SetHWScoreboard.asm \ + SetHWScoreboard_MBAFF.asm \ + set_SB_offset.asm \ + SetupForHWMC.asm \ + weightedPred.asm \ + writeRecon_C_8x4.asm \ + writeRecon_Y_16x8.asm \ + writeRecon_YC.asm + +INTEL_ILDB_ASM = \ + ../ildb/AVC_ILDB_Child_Field_UV.asm \ + ../ildb/AVC_ILDB_Child_Field_Y.asm \ + ../ildb/AVC_ILDB_Child_Mbaff_UV.asm \ + ../ildb/AVC_ILDB_Child_Mbaff_Y.asm \ + ../ildb/AVC_ILDB_Child_UV.asm \ + ../ildb/AVC_ILDB_Child_Y.asm \ + ../ildb/AVC_ILDB_Chroma_Core.asm \ + ../ildb/AVC_ILDB_Chroma_Core_Mbaff.asm \ + ../ildb/AVC_ILDB_CloseGateway.asm \ + ../ildb/AVC_ILDB_Dep_Check.asm \ + ../ildb/AVC_ILDB_Filter_Mbaff_UV_h.asm \ + ../ildb/AVC_ILDB_Filter_Mbaff_UV_v.asm \ + ../ildb/AVC_ILDB_Filter_Mbaff_Y_h.asm \ + ../ildb/AVC_ILDB_Filter_Mbaff_Y_v.asm \ + ../ildb/AVC_ILDB_Filter_UV_h.asm \ + ../ildb/AVC_ILDB_Filter_UV_v.asm \ + ../ildb/AVC_ILDB_Filter_Y_h.asm \ + ../ildb/AVC_ILDB_Filter_Y_v.asm \ + ../ildb/AVC_ILDB_ForwardMsg.asm \ + ../ildb/AVC_ILDB_Luma_Core.asm \ + ../ildb/AVC_ILDB_Luma_Core_Mbaff.asm \ + ../ildb/AVC_ILDB_LumaThrdLimit.asm \ + ../ildb/AVC_ILDB_OpenGateway.asm \ + ../ildb/AVC_ILDB_Root_Field_UV.asm \ + ../ildb/AVC_ILDB_Root_Field_Y.asm \ + ../ildb/AVC_ILDB_Root_Mbaff_UV.asm \ + ../ildb/AVC_ILDB_Root_Mbaff_Y.asm \ + ../ildb/AVC_ILDB_Root_UV.asm \ + ../ildb/AVC_ILDB_Root_Y.asm \ + ../ildb/AVC_ILDB_Spawn.asm \ + ../ildb/AVC_ILDB_SpawnChild.asm \ + ../ildb/AVC_ILDB_SpawnChromaRoot.asm \ + ../ildb/load_Cur_UV_8x8T.asm \ + ../ildb/load_Cur_UV_8x8T_Mbaff.asm \ + ../ildb/load_Cur_UV_Right_Most_2x8.asm \ + ../ildb/load_Cur_Y_16x16T.asm \ + ../ildb/load_Cur_Y_16x16T_Mbaff.asm \ + ../ildb/load_Cur_Y_Right_Most_4x16.asm \ + ../ildb/Load_ILDB_Cntrl_Data_16DW.asm \ + ../ildb/Load_ILDB_Cntrl_Data_22DW.asm \ + ../ildb/Load_ILDB_Cntrl_Data_64DW.asm \ + ../ildb/Load_ILDB_Cntrl_Data.asm \ + ../ildb/load_Left_UV_2x8T.asm \ + ../ildb/load_Left_UV_2x8T_Mbaff.asm \ + ../ildb/load_Left_Y_4x16T.asm \ + ../ildb/load_Left_Y_4x16T_Mbaff.asm \ + ../ildb/loadNV12_16x16T.asm \ + ../ildb/loadNV12_16x4.asm \ + ../ildb/load_Top_UV_8x2.asm \ + ../ildb/load_Top_UV_8x2_Mbaff.asm \ + ../ildb/load_Top_Y_16x4.asm \ + ../ildb/load_Top_Y_16x4_Mbaff.asm \ + ../ildb/save_Cur_UV_8x8.asm \ + ../ildb/save_Cur_UV_8x8_Mbaff.asm \ + ../ildb/save_Cur_Y_16x16.asm \ + ../ildb/save_Cur_Y_16x16_Mbaff.asm \ + ../ildb/save_Left_UV_8x2T.asm \ + ../ildb/save_Left_UV_8x2T_Mbaff.asm \ + ../ildb/save_Left_Y_16x4T.asm \ + ../ildb/save_Left_Y_16x4T_Mbaff.asm \ + ../ildb/saveNV12_16x16.asm \ + ../ildb/saveNV12_16x4.asm \ + ../ildb/saveNV12_16x4T.asm \ + ../ildb/save_Top_UV_8x2.asm \ + ../ildb/save_Top_UV_8x2_Mbaff.asm \ + ../ildb/save_Top_Y_16x4.asm \ + ../ildb/save_Top_Y_16x4_Mbaff.asm \ + ../ildb/SetupVPKernel.asm \ + ../ildb/Transpose_Cur_UV_2x8.asm \ + ../ildb/Transpose_Cur_UV_8x8.asm \ + ../ildb/Transpose_Cur_UV_Right_Most_2x8.asm \ + ../ildb/Transpose_Cur_Y_16x16.asm \ + ../ildb/Transpose_Cur_Y_4x16.asm \ + ../ildb/Transpose_Cur_Y_Right_Most_4x16.asm \ + ../ildb/Transpose_Left_UV_2x8.asm \ + ../ildb/Transpose_Left_Y_4x16.asm \ + ../ildb/TransposeNV12_16x16.asm \ + ../ildb/TransposeNV12_4x16.asm \ + ../ildb/writeURB.asm \ + ../ildb/writeURB_UV_Child.asm \ + ../ildb/writeURB_Y_Child.asm + EXTRA_DIST = $(INTEL_G4I) \ $(INTEL_G4A) \ $(INTEL_G4B) \ @@ -18,11 +163,22 @@ SUFFIXES = .g4a .g4b .g4a.g4b: m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && intel-gen4asm -g 5 -o $@.gen5 $*.g4m && rm $*.g4m +$(INTEL_MC_G4B_GEN5): $(INTEL_MC_ASM) $(INTEL_ILDB_ASM) + @cpp -D DEV_ILK -I ../ildb/ AllAVC.asm > a.asm; \ + ../../gpp.py a.asm avc_mc.asm; \ + intel-gen4asm -l list -a -e _export.inc -o $@ -g 5 avc_mc.asm; \ + cpp -D DEV_ILK -I ../ildb/ AllAVC.asm > a.asm; \ + ../../gpp.py a.asm avc_mc.asm; \ + intel-gen4asm -l list -a -e _export.inc -o $@ -g 5 avc_mc.asm; \ + cat _export.inc | sed "s/_IP/_IP_GEN5/g" > $(INTEL_MC_EXPORT_GEN5); \ + rm a.asm avc_mc.asm _export.inc + $(INTEL_G4B): $(INTEL_G4I) -BUILT_SOURCES= $(INTEL_G4B) +BUILT_SOURCES= $(INTEL_G4B) $(INTEL_MC_G4B) $(INTEL_MC_G4B_GEN5) clean-local: -rm -f $(INTEL_G4B) -rm -f $(INTEL_G4B_GEN5) + -rm -f $(INTEL_MC_G4B_GEN5) $(INTEL_MC_EXPORT_GEN5) endif diff --git a/shaders/h264/mc/list b/shaders/h264/mc/list new file mode 100644 index 0000000..000e0fb --- /dev/null +++ b/shaders/h264/mc/list @@ -0,0 +1,21 @@ +INTRA_16x16 +INTRA_8x8 +INTRA_4x4 +INTRA_PCM +FRAME_MB +FIELD_MB +MBAFF_MB +SETHWSCOREBOARD +SETHWSCOREBOARD_MBAFF +AVC_ILDB_ROOT_Y_ILDB_FRAME +AVC_ILDB_CHILD_Y_ILDB_FRAME +AVC_ILDB_ROOT_UV_ILDB_FRAME +AVC_ILDB_CHILD_UV_ILDB_FRAME +AVC_ILDB_ROOT_Y_ILDB_FIELD +AVC_ILDB_CHILD_Y_ILDB_FIELD +AVC_ILDB_ROOT_UV_ILDB_FIELD +AVC_ILDB_CHILD_UV_ILDB_FIELD +AVC_ILDB_ROOT_Y_ILDB_MBAFF +AVC_ILDB_CHILD_Y_ILDB_MBAFF +AVC_ILDB_ROOT_UV_ILDB_MBAFF +AVC_ILDB_CHILD_UV_ILDB_MBAFF diff --git a/shaders/post_processing/Makefile.am b/shaders/post_processing/Makefile.am index 9f97eb0..85c3081 100644 --- a/shaders/post_processing/Makefile.am +++ b/shaders/post_processing/Makefile.am @@ -7,6 +7,89 @@ INTEL_G4B = null.g4b INTEL_G4B_GEN5 = null.g4b.gen5 +INTEL_PP_G4B_GEN5 = \ + nv12_avs_nv12.g4b.gen5 \ + nv12_dndi_nv12.g4b.gen5 \ + nv12_load_save_nv12.g4b.gen5 \ + nv12_scaling_nv12.g4b.gen5 + +INTEL_PP_ASM = \ + Common/AYUV_Load_16x8.asm \ + Common/IMC3_Load_8x4.asm \ + Common/IMC3_Load_8x5.asm \ + Common/IMC3_Load_9x5.asm \ + Common/Init_All_Regs.asm \ + Common/Multiple_Loop.asm \ + Common/Multiple_Loop_Head.asm \ + Common/NV11_Load_4x8.asm \ + Common/NV11_Load_5x8.asm \ + Common/NV12_Load_8x4.asm \ + Common/NV12_Load_8x5.asm \ + Common/NV12_Load_9x5.asm \ + Common/P208_Load_8x8.asm \ + Common/P208_Load_9x8.asm \ + Common/PA_Load_8x8.asm \ + Common/PA_Load_9x8.asm \ + Common/PL16x8_PL8x4.asm \ + Common/PL16x8_PL8x8.asm \ + Common/PL4x8_Save_NV11.asm \ + Common/PL5x8_PL16x8.asm \ + Common/PL5x8_PL8x8.asm \ + Common/PL8x4_Save_IMC3.asm \ + Common/PL8x4_Save_NV12.asm \ + Common/PL8x5_PL8x8.asm \ + Common/PL8x8_PL8x4.asm \ + Common/PL8x8_Save_P208.asm \ + Common/PL8x8_Save_PA.asm \ + Common/PL9x5_PL16x8.asm \ + Common/PL9x8_PL16x8.asm \ + Common/readSampler16x1.asm \ + Common/RGB16x8_Save_RGB16.asm \ + Common/RGB16x8_Save_RGB.asm \ + Common/RGB16x8_Save_Y416.asm \ + Common/RGB_Pack.asm \ + Common/SetupVPKernel.asm \ + Core_Kernels/AVS_SetupFirstBlock.asm \ + Core_Kernels/AVS_SetupSecondBlock.asm \ + Core_Kernels/DI_Hist_Save.asm \ + Core_Kernels/DI_SAVE_PA.asm \ + Core_Kernels/DNDI_COMMAND.asm \ + Core_Kernels/DNDI_Hist_Save.asm \ + Core_Kernels/PA_AVS_IEF_16x8.asm \ + Core_Kernels/PA_AVS_IEF_8x4.asm \ + Core_Kernels/PA_AVS_IEF_8x8.asm \ + Core_Kernels/PA_AVS_IEF_Sample.asm \ + Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm \ + Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm \ + Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm \ + Core_Kernels/PA_DN_ALG.asm \ + Core_Kernels/PA_DNDI_ALG.asm \ + Core_Kernels/PA_Scaling.asm \ + Core_Kernels/PL2_AVS_IEF_16x8.asm \ + Core_Kernels/PL2_AVS_IEF_8x4.asm \ + Core_Kernels/PL2_AVS_IEF_8x8.asm \ + Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm \ + Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm \ + Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm \ + Core_Kernels/PL2_Scaling.asm \ + Core_Kernels/PL3_AVS_IEF_16x8.asm \ + Core_Kernels/PL3_AVS_IEF_8x4.asm \ + Core_Kernels/PL3_AVS_IEF_8x8.asm \ + Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm \ + Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm \ + Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm \ + Core_Kernels/PL3_Scaling.asm \ + Core_Kernels/PL_DN_ALG.asm \ + Core_Kernels/PL_DNDI_ALG.asm \ + Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm \ + Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm \ + Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm \ + Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm \ + Core_Kernels/RGB_AVS_IEF_16x8.asm \ + Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm \ + Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm \ + Core_Kernels/RGB_Scaling.asm + EXTRA_DIST = $(INTEL_G4I) \ $(INTEL_G4A) \ $(INTEL_G4B) \ @@ -20,9 +103,17 @@ SUFFIXES = .g4a .g4b $(INTEL_G4B): $(INTEL_G4I) -BUILT_SOURCES= $(INTEL_G4B) +$(INTEL_PP_G4B_GEN5): $(INTEL_PP_ASM) + @_PP_TARGET=$@; \ + cpp -D DEV_ILK -I Common/ -I Core_Kernels $${_PP_TARGET/.g4b.gen5/.asm} > _pp0.asm; \ + ../gpp.py _pp0.asm _pp1.asm; \ + intel-gen4asm -a -o $@ -g 5 _pp1.asm; \ + rm _pp0.asm _pp1.asm + +BUILT_SOURCES= $(INTEL_G4B) $(INTEL_PP_G4B_GEN5) clean-local: -rm -f $(INTEL_G4B) -rm -f $(INTEL_G4B_GEN5) + -rm -f $(INTEL_PP_G4B_GEN5) endif -- cgit v1.2.3