summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiang, Haihao <haihao.xiang@intel.com>2011-04-26 16:10:22 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2011-04-26 16:10:22 +0800
commit89b22723b957613466a681046f32f802381ec3c1 (patch)
tree252015dae3f7976b4fe6e1f8c1acb7ddf92e2290
parentae8c5e4aa75195c7c36c5fa4584069e520006b0f (diff)
parent8ea71178ade8ea2076ace283bc2d957a45eda6f6 (diff)
Merge branch 'snb-encoder'
-rw-r--r--Makefile.am10
-rw-r--r--gen6_mfc.c724
-rw-r--r--gen6_mfc.h101
-rw-r--r--gen6_vme.c740
-rw-r--r--gen6_vme.h87
-rw-r--r--i965_defines.h9
-rw-r--r--i965_drv_video.c143
-rw-r--r--i965_drv_video.h22
-rw-r--r--i965_encoder.c385
-rw-r--r--i965_encoder.h75
-rw-r--r--intel_batchbuffer.c16
-rw-r--r--intel_batchbuffer.h3
-rw-r--r--shaders/Makefile.am2
-rw-r--r--shaders/vme/Makefile.am20
-rw-r--r--shaders/vme/inter_frame.asm111
-rw-r--r--shaders/vme/inter_frame.g6b27
-rw-r--r--shaders/vme/intra_frame.asm102
-rw-r--r--shaders/vme/intra_frame.g6b46
-rw-r--r--shaders/vme/vme_header.inc129
19 files changed, 2718 insertions, 34 deletions
diff --git a/Makefile.am b/Makefile.am
index 8dd13bd..3fdc835 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -44,7 +44,10 @@ i965_drv_video_la_SOURCES = \
i965_avc_hw_scoreboard.c\
i965_avc_ildb.c \
i965_post_processing.c \
- gen6_mfd.c
+ gen6_mfd.c \
+ i965_encoder.c \
+ gen6_vme.c \
+ gen6_mfc.c
noinst_HEADERS = \
object_heap.h \
@@ -63,4 +66,7 @@ noinst_HEADERS = \
i965_avc_hw_scoreboard.h\
i965_avc_ildb.h \
i965_post_processing.h \
- gen6_mfd.h
+ gen6_mfd.h \
+ i965_encoder.h \
+ gen6_vme.h \
+ gen6_mfc.h
diff --git a/gen6_mfc.c b/gen6_mfc.c
new file mode 100644
index 0000000..c4c9f48
--- /dev/null
+++ b/gen6_mfc.c
@@ -0,0 +1,724 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhou Chang <chang.zhou@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "assert.h"
+#include "intel_batchbuffer.h"
+#include "i965_defines.h"
+#include "i965_structs.h"
+#include "i965_drv_video.h"
+
+static void
+gen6_mfc_pipe_mode_select(VADriverContextP ctx)
+{
+ BEGIN_BCS_BATCH(ctx,4);
+
+ OUT_BCS_BATCH(ctx, MFX_PIPE_MODE_SELECT | (4 - 2));
+ OUT_BCS_BATCH(ctx,
+ (0 << 10) | /* disable Stream-Out */
+ (1 << 9) | /* Post Deblocking Output */
+ (0 << 8) | /* Pre Deblocking Output */
+ (0 << 7) | /* disable TLB prefectch */
+ (0 << 5) | /* not in stitch mode */
+ (1 << 4) | /* encoding mode */
+ (2 << 0)); /* Standard Select: AVC */
+ OUT_BCS_BATCH(ctx,
+ (0 << 20) | /* round flag in PB slice */
+ (0 << 19) | /* round flag in Intra8x8 */
+ (0 << 7) | /* expand NOA bus flag */
+ (1 << 6) | /* must be 1 */
+ (0 << 5) | /* disable clock gating for NOA */
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) | /* AVC long field motion vector */
+ (0 << 0)); /* always calculate AVC ILDB boundary strength */
+ OUT_BCS_BATCH(ctx, 0);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfc_surface_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+
+ BEGIN_BCS_BATCH(ctx, 6);
+
+ OUT_BCS_BATCH(ctx, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx,
+ ((bcs_state->surface_state.height - 1) << 19) |
+ ((bcs_state->surface_state.width - 1) << 6));
+ OUT_BCS_BATCH(ctx,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+ (0 << 22) | /* surface object control state, FIXME??? */
+ ((bcs_state->surface_state.w_pitch - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 for interleave U/V */
+ (1 << 1) | /* must be y-tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
+ OUT_BCS_BATCH(ctx,
+ (0 << 16) | /* must be 0 for interleave U/V */
+ (bcs_state->surface_state.h_pitch)); /* y offset for U(cb) */
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+ int i;
+
+ BEGIN_BCS_BATCH(ctx, 24);
+
+ OUT_BCS_BATCH(ctx, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
+
+ OUT_BCS_BATCH(ctx, 0); /* pre output addr */
+
+ OUT_BCS_RELOC(ctx, bcs_state->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* post output addr */
+
+ OUT_BCS_RELOC(ctx, bcs_state->uncompressed_picture_source.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* uncompressed data */
+
+ OUT_BCS_BATCH(ctx, 0); /* StreamOut data*/
+ OUT_BCS_RELOC(ctx, bcs_state->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_RELOC(ctx, bcs_state->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ /* 7..22 Reference pictures*/
+ for (i = 0; i < ARRAY_ELEMS(bcs_state->reference_surfaces); i++) {
+ if ( bcs_state->reference_surfaces[i].bo != NULL) {
+ OUT_BCS_RELOC(ctx, bcs_state->reference_surfaces[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ } else {
+ OUT_BCS_BATCH(ctx, 0);
+ }
+ }
+ OUT_BCS_BATCH(ctx, 0); /* no block status */
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+
+ BEGIN_BCS_BATCH(ctx, 11);
+
+ OUT_BCS_BATCH(ctx, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ /* MFX Indirect MV Object Base Address */
+ OUT_BCS_RELOC(ctx, media_state->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+
+ BEGIN_BCS_BATCH(ctx, 4);
+
+ OUT_BCS_BATCH(ctx, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+ OUT_BCS_RELOC(ctx, bcs_state->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfc_avc_img_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+
+ int width_in_mbs = (bcs_state->surface_state.width + 15) / 16;
+ int height_in_mbs = (bcs_state->surface_state.height + 15) / 16;
+
+ BEGIN_BCS_BATCH(ctx, 13);
+ OUT_BCS_BATCH(ctx, MFX_AVC_IMG_STATE | (13 - 2));
+ OUT_BCS_BATCH(ctx,
+ ((width_in_mbs * height_in_mbs) & 0xFFFF));
+ OUT_BCS_BATCH(ctx,
+ (height_in_mbs << 16) |
+ (width_in_mbs << 0));
+ OUT_BCS_BATCH(ctx,
+ (0 << 24) | /*Second Chroma QP Offset*/
+ (0 << 16) | /*Chroma QP Offset*/
+ (0 << 14) | /*Max-bit conformance Intra flag*/
+ (0 << 13) | /*Max Macroblock size conformance Inter flag*/
+ (1 << 12) | /*Should always be written as "1" */
+ (0 << 10) | /*QM Preset FLag */
+ (0 << 8) | /*Image Structure*/
+ (0 << 0) ); /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
+ OUT_BCS_BATCH(ctx,
+ (0 << 16) | /*Mininum Frame size*/
+ (0 << 15) | /*Disable reading of Macroblock Status Buffer*/
+ (0 << 14) | /*Load BitStream Pointer only once, 1 slic 1 frame*/
+ (0 << 13) | /*CABAC 0 word insertion test enable*/
+ (1 << 12) | /*MVUnpackedEnable,compliant to DXVA*/
+ (1 << 10) | /*Chroma Format IDC, 4:2:0*/
+ (1 << 7) | /*0:CAVLC encoding mode,1:CABAC*/
+ (0 << 6) | /*Only valid for VLD decoding mode*/
+ (0 << 5) | /*Constrained Intra Predition Flag, from PPS*/
+ (0 << 4) | /*Direct 8x8 inference flag*/
+ (0 << 3) | /*Only 8x8 IDCT Transform Mode Flag*/
+ (1 << 2) | /*Frame MB only flag*/
+ (0 << 1) | /*MBAFF mode is in active*/
+ (0 << 0) ); /*Field picture flag*/
+ OUT_BCS_BATCH(ctx, 0); /*Mainly about MB rate control and debug, just ignoring*/
+ OUT_BCS_BATCH(ctx, /*Inter and Intra Conformance Max size limit*/
+ (0xBB8 << 16) | /*InterMbMaxSz*/
+ (0xEE8) ); /*IntraMbMaxSz*/
+ OUT_BCS_BATCH(ctx, 0); /*Reserved*/
+ OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/
+ OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/
+ OUT_BCS_BATCH(ctx, 0x8C000000);
+ OUT_BCS_BATCH(ctx, 0x00010000);
+ OUT_BCS_BATCH(ctx, 0);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+
+static void gen6_mfc_avc_directmode_state(VADriverContextP ctx)
+{
+ int i;
+
+ BEGIN_BCS_BATCH(ctx, 69);
+
+ OUT_BCS_BATCH(ctx, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+ //TODO: reference DMV
+ for(i = 0; i < 16; i++){
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ }
+
+ //TODO: current DMV just for test
+#if 0
+ OUT_BCS_RELOC(ctx, bcs_state->direct_mv_buffers[0].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+#else
+ //drm_intel_bo_pin(bcs_state->direct_mv_buffers[0].bo, 0x1000);
+ //OUT_BCS_BATCH(ctx, bcs_state->direct_mv_buffers[0].bo->offset);
+ OUT_BCS_BATCH(ctx, 0);
+#endif
+
+
+ OUT_BCS_BATCH(ctx, 0);
+
+ //TODO: POL list
+ for(i = 0; i < 34; i++) {
+ OUT_BCS_BATCH(ctx, 0);
+ }
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void gen6_mfc_avc_slice_state(VADriverContextP ctx, int intra_slice)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+
+ BEGIN_BCS_BATCH(ctx, 11);;
+
+ OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2) );
+
+ if ( intra_slice )
+ OUT_BCS_BATCH(ctx, 2); /*Slice Type: I Slice*/
+ else
+ OUT_BCS_BATCH(ctx, 0); /*Slice Type: P Slice*/
+
+ if ( intra_slice )
+ OUT_BCS_BATCH(ctx, 0); /*no reference frames and pred_weight_table*/
+ else
+ OUT_BCS_BATCH(ctx, 0x00010000); /*1 reference frame*/
+
+ OUT_BCS_BATCH(ctx, (0<<24) | /*Enable deblocking operation*/
+ (26<<16) | /*Slice Quantization Parameter*/
+ 0x0202 );
+ OUT_BCS_BATCH(ctx, 0); /*First MB X&Y , the postion of current slice*/
+ OUT_BCS_BATCH(ctx, ( ((bcs_state->surface_state.height+15)/16) << 16) );
+
+ OUT_BCS_BATCH(ctx,
+ (0<<31) | /*RateControlCounterEnable = disable*/
+ (1<<30) | /*ResetRateControlCounter*/
+ (2<<28) | /*RC Triggle Mode = Loose Rate Control*/
+ (1<<19) | /*IsLastSlice*/
+ (0<<18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
+ (0<<17) | /*HeaderPresentFlag*/
+ (1<<16) | /*SliceData PresentFlag*/
+ (0<<15) | /*TailPresentFlag*/
+ (1<<13) | /*RBSP NAL TYPE*/
+ (0<<12) ); /*CabacZeroWordInsertionEnable*/
+
+ OUT_BCS_RELOC(ctx, bcs_state->mfc_indirect_pak_bse_object.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ bcs_state->mfc_indirect_pak_bse_object.offset);
+
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+static void gen6_mfc_avc_qm_state(VADriverContextP ctx)
+{
+ int i;
+
+ BEGIN_BCS_BATCH(ctx, 58);
+
+ OUT_BCS_BATCH(ctx, MFX_AVC_QM_STATE | 56);
+ OUT_BCS_BATCH(ctx, 0xFF ) ;
+ for( i = 0; i < 56; i++) {
+ OUT_BCS_BATCH(ctx, 0x10101010);
+ }
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void gen6_mfc_avc_fqm_state(VADriverContextP ctx)
+{
+ int i;
+
+ BEGIN_BCS_BATCH(ctx, 113);
+ OUT_BCS_BATCH(ctx, MFC_AVC_FQM_STATE | (113 - 2));
+
+ for(i = 0; i < 112;i++) {
+ OUT_BCS_BATCH(ctx, 0x10001000);
+ }
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx)
+{
+ int i;
+
+ BEGIN_BCS_BATCH(ctx, 10);
+
+ OUT_BCS_BATCH(ctx, MFX_AVC_REF_IDX_STATE | 8);
+ OUT_BCS_BATCH(ctx, 0); //Select L0
+
+ OUT_BCS_BATCH(ctx, 0x80808000); //Only 1 reference
+ for(i = 0; i < 7; i++) {
+ OUT_BCS_BATCH(ctx, 0x80808080);
+ }
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+
+static void
+gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data)
+{
+ BEGIN_BCS_BATCH(ctx, 4);
+
+ OUT_BCS_BATCH(ctx, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
+ OUT_BCS_BATCH(ctx, (32<<8) |
+ (1 << 3) |
+ (1 << 2) |
+ (flush_data << 1) |
+ (1<<0) );
+ OUT_BCS_BATCH(ctx, 0x00000003);
+ OUT_BCS_BATCH(ctx, 0xABCD1234);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static int
+gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg)
+{
+ int len_in_dwords = 11;
+
+ BEGIN_BCS_BATCH(ctx, len_in_dwords);
+
+ OUT_BCS_BATCH(ctx, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx,
+ (0 << 24) | /* PackedMvNum, Debug*/
+ (0 << 20) | /* No motion vector */
+ (1 << 19) | /* CbpDcY */
+ (1 << 18) | /* CbpDcU */
+ (1 << 17) | /* CbpDcV */
+ (msg[0] & 0xFFFF) );
+
+ OUT_BCS_BATCH(ctx, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
+ OUT_BCS_BATCH(ctx, 0x000F000F); /* Code Block Pattern */
+ OUT_BCS_BATCH(ctx, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
+
+ /*Stuff for Intra MB*/
+ OUT_BCS_BATCH(ctx, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
+ OUT_BCS_BATCH(ctx, msg[2]);
+ OUT_BCS_BATCH(ctx, msg[3]&0xFC);
+
+ OUT_BCS_BATCH(ctx, 0x8040000); /*MaxSizeInWord and TargetSzieInWord*/
+
+ ADVANCE_BCS_BATCH(ctx);
+
+ return len_in_dwords;
+}
+
+static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset)
+{
+ int len_in_dwords = 11;
+
+ BEGIN_BCS_BATCH(ctx, len_in_dwords);
+
+ OUT_BCS_BATCH(ctx, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+
+ OUT_BCS_BATCH(ctx, 32); /* 32 MV*/
+ OUT_BCS_BATCH(ctx, offset);
+
+ OUT_BCS_BATCH(ctx,
+ (1 << 24) | /* PackedMvNum, Debug*/
+ (4 << 20) | /* 8 MV, SNB don't use it*/
+ (1 << 19) | /* CbpDcY */
+ (1 << 18) | /* CbpDcU */
+ (1 << 17) | /* CbpDcV */
+ (0 << 15) | /* Transform8x8Flag = 0*/
+ (0 << 14) | /* Frame based*/
+ (0 << 13) | /* Inter MB */
+ (1 << 8) | /* MbType = P_L0_16x16 */
+ (0 << 7) | /* MBZ for frame */
+ (0 << 6) | /* MBZ */
+ (2 << 4) | /* MBZ for inter*/
+ (0 << 3) | /* MBZ */
+ (0 << 2) | /* SkipMbFlag */
+ (0 << 0)); /* InterMbMode */
+
+ OUT_BCS_BATCH(ctx, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
+ OUT_BCS_BATCH(ctx, 0x000F000F); /* Code Block Pattern */
+ OUT_BCS_BATCH(ctx, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
+
+ /*Stuff for Inter MB*/
+ OUT_BCS_BATCH(ctx, 0x0);
+ OUT_BCS_BATCH(ctx, 0x0);
+ OUT_BCS_BATCH(ctx, 0x0);
+
+ OUT_BCS_BATCH(ctx, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/
+
+ ADVANCE_BCS_BATCH(ctx);
+
+ return len_in_dwords;
+}
+
+static void gen6_mfc_init(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+ dri_bo *bo;
+ int i;
+
+ /*Encode common setup for MFC*/
+ dri_bo_unreference(bcs_state->post_deblocking_output.bo);
+ bcs_state->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(bcs_state->pre_deblocking_output.bo);
+ bcs_state->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(bcs_state->uncompressed_picture_source.bo);
+ bcs_state->uncompressed_picture_source.bo = NULL;
+
+ dri_bo_unreference(bcs_state->mfc_indirect_pak_bse_object.bo);
+ bcs_state->mfc_indirect_pak_bse_object.bo = NULL;
+
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+ dri_bo_unreference(bcs_state->direct_mv_buffers[i].bo);
+ bcs_state->direct_mv_buffers[i].bo = NULL;
+ }
+
+ for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
+ if ( bcs_state->reference_surfaces[i].bo != NULL)
+ dri_bo_unreference( bcs_state->reference_surfaces[i].bo );
+ bcs_state->reference_surfaces[i].bo = NULL;
+ }
+
+ dri_bo_unreference(bcs_state->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 128 * 64,
+ 64);
+ assert(bo);
+ bcs_state->intra_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(bcs_state->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 49152, /* 6 * 128 * 64 */
+ 64);
+ assert(bo);
+ bcs_state->deblocking_filter_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(bcs_state->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 12288, /* 1.5 * 128 * 64 */
+ 0x1000);
+ assert(bo);
+ bcs_state->bsd_mpc_row_store_scratch_buffer.bo = bo;
+}
+
+void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, void *obj)
+{
+ struct mfc_encode_state *encode_state = obj;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+ int width_in_mbs = (bcs_state->surface_state.width + 15) / 16;
+ int height_in_mbs = (bcs_state->surface_state.height + 15) / 16;
+ int x,y;
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ unsigned int *msg = NULL, offset = 0;
+ int emit_new_state = 1, object_len_in_bytes;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+
+ intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
+
+ if (is_intra) {
+ dri_bo_map(media_state->vme_output.bo , 1);
+ msg = (unsigned int *)media_state->vme_output.bo->virtual;
+ }
+
+ for (y = 0; y < height_in_mbs; y++) {
+ for (x = 0; x < width_in_mbs; x++) {
+ int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
+ int qp = pSequenceParameter->initial_qp;
+
+ if (emit_new_state) {
+ intel_batchbuffer_emit_mi_flush_bcs(ctx);
+ gen6_mfc_pipe_mode_select(ctx);
+ gen6_mfc_surface_state(ctx);
+ gen6_mfc_pipe_buf_addr_state(ctx);
+ gen6_mfc_ind_obj_base_addr_state(ctx);
+ gen6_mfc_bsp_buf_base_addr_state(ctx);
+ gen6_mfc_avc_img_state(ctx);
+ gen6_mfc_avc_qm_state(ctx);
+ gen6_mfc_avc_fqm_state(ctx);
+ gen6_mfc_avc_ref_idx_state(ctx);
+ /*gen6_mfc_avc_directmode_state(ctx);*/
+ gen6_mfc_avc_slice_state(ctx, is_intra);
+ /*gen6_mfc_avc_insert_object(ctx, 0);*/
+ emit_new_state = 0;
+ }
+
+ if (is_intra) {
+ assert(msg);
+ object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg);
+ msg += 4;
+ } else {
+ object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset);
+ offset += 64;
+ }
+
+ if (intel_batchbuffer_check_free_space_bcs(ctx, object_len_in_bytes) == 0) {
+ intel_batchbuffer_end_atomic_bcs(ctx);
+ intel_batchbuffer_flush_bcs(ctx);
+ emit_new_state = 1;
+ intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
+ }
+ }
+ }
+
+ if (is_intra)
+ dri_bo_unmap(media_state->vme_output.bo);
+
+ intel_batchbuffer_end_atomic_bcs(ctx);
+}
+
+static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+ struct object_surface *obj_surface;
+ struct object_buffer *obj_buffer;
+ dri_bo *bo;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+
+ /*Setup all the input&output object*/
+ obj_surface = SURFACE(pPicParameter->reconstructed_picture);
+ assert(obj_surface);
+
+ if (!obj_surface->bo) {
+ uint32_t tiling_mode = I915_TILING_Y;
+ unsigned long pitch;
+
+ obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr,
+ "vaapi surface",
+ obj_surface->width,
+ obj_surface->height + obj_surface->height / 2,
+ 1,
+ &tiling_mode,
+ &pitch,
+ 0);
+ assert(obj_surface->bo);
+ assert(tiling_mode == I915_TILING_Y);
+ assert(pitch == obj_surface->width);
+ }
+
+ bcs_state->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(bcs_state->post_deblocking_output.bo);
+
+ bcs_state->surface_state.width = obj_surface->orig_width;
+ bcs_state->surface_state.height = obj_surface->orig_height;
+ bcs_state->surface_state.w_pitch = obj_surface->width;
+ bcs_state->surface_state.h_pitch = obj_surface->height;
+
+ obj_surface = SURFACE(pPicParameter->reference_picture);
+ assert(obj_surface);
+ if ( obj_surface->bo != NULL) {
+ bcs_state->reference_surfaces[0].bo = obj_surface->bo;
+ dri_bo_reference(obj_surface->bo);
+ }
+
+ obj_surface = SURFACE(encode_state->current_render_target);
+ assert(obj_surface && obj_surface->bo);
+ bcs_state->uncompressed_picture_source.bo = obj_surface->bo;
+ dri_bo_reference(bcs_state->uncompressed_picture_source.bo);
+
+ obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
+ bo = obj_buffer->buffer_store->bo;
+ assert(bo);
+ bcs_state->mfc_indirect_pak_bse_object.bo = bo;
+ bcs_state->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
+ dri_bo_reference(bcs_state->mfc_indirect_pak_bse_object.bo);
+
+ /*Programing bcs pipeline*/
+ gen6_mfc_avc_pipeline_programing(ctx, encode_state); //filling the pipeline
+
+ return vaStatus;
+}
+
+static VAStatus gen6_mfc_run(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ intel_batchbuffer_flush_bcs(ctx); //run the pipeline
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen6_mfc_stop(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+#if 0
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+
+ struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
+ //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
+ //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
+ my_debug(obj_surface);
+
+#endif
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen6_mfc_avc_encode_picture(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ gen6_mfc_init(ctx);
+ gen6_mfc_avc_prepare(ctx, context, encode_state);
+ gen6_mfc_run(ctx, context, encode_state);
+ gen6_mfc_stop(ctx, context, encode_state);
+
+ return VA_STATUS_SUCCESS;
+}
+
+VAStatus
+gen6_mfc_pipeline(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_context *obj_context = CONTEXT(context);
+ struct object_config *obj_config;
+ VAContextID config;
+ VAStatus vaStatus;
+
+ assert(obj_context);
+ config = obj_context->config_id;
+ obj_config = CONFIG(config);
+ assert(obj_config);
+
+ switch( obj_config->profile) {
+ case VAProfileH264Baseline:
+ vaStatus = gen6_mfc_avc_encode_picture(ctx, context, &(obj_context->encode_state));
+ break;
+
+ /* FIXME: add for other profile */
+ default:
+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+ break;
+ }
+
+ return VA_STATUS_SUCCESS;
+}
+
diff --git a/gen6_mfc.h b/gen6_mfc.h
new file mode 100644
index 0000000..d41b383
--- /dev/null
+++ b/gen6_mfc.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhou Chang <chang.zhou@intel.com>
+ *
+ */
+
+#ifndef _GEN6_MFC_BCS_H_
+#define _GEN6_MFC_BCS_H_
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+
+struct mfc_encode_state;
+
+#define MAX_MFC_REFERENCE_SURFACES 16
+#define NUM_MFC_DMV_BUFFERS 34
+struct gen6_mfc_bcs_state
+{
+ struct {
+ unsigned int width;
+ unsigned int height;
+ unsigned int w_pitch;
+ unsigned int h_pitch;
+ } surface_state;
+
+
+ //MFX_PIPE_BUF_ADDR_STATE
+ struct {
+ dri_bo *bo;
+ } post_deblocking_output; //OUTPUT: reconstructed picture
+
+ struct {
+ dri_bo *bo;
+ } pre_deblocking_output; //OUTPUT: reconstructed picture with deblocked
+
+ struct {
+ dri_bo *bo;
+ } uncompressed_picture_source; //INPUT: original compressed image
+
+ struct {
+ dri_bo *bo;
+ } intra_row_store_scratch_buffer; //INTERNAL:
+
+ struct {
+ dri_bo *bo;
+ } deblocking_filter_row_store_scratch_buffer; //INTERNAL:
+
+ struct {
+ dri_bo *bo;
+ } reference_surfaces[MAX_MFC_REFERENCE_SURFACES]; //INTERNAL: refrence surfaces
+
+ //MFX_IND_OBJ_BASE_ADDR_STATE
+ struct{
+ dri_bo *bo;
+ } mfc_indirect_mv_object; //INPUT: the blocks' mv info
+
+ struct {
+ dri_bo *bo;
+ int offset;
+ } mfc_indirect_pak_bse_object; //OUTPUT: the compressed bitstream
+
+ //MFX_BSP_BUF_BASE_ADDR_STATE
+ struct {
+ dri_bo *bo;
+ }bsd_mpc_row_store_scratch_buffer; //INTERNAL:
+
+ //MFX_AVC_DIRECTMODE_STATE
+ struct {
+ dri_bo *bo;
+ }direct_mv_buffers[NUM_MFC_DMV_BUFFERS]; //INTERNAL: 0-31 as input,32 and 33 as output
+};
+
+VAStatus gen6_mfc_pipeline(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state);
+
+#endif /* _GEN6_MFC_BCS_H_ */
diff --git a/gen6_vme.c b/gen6_vme.c
new file mode 100644
index 0000000..ccf26bc
--- /dev/null
+++ b/gen6_vme.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhou Chang <chang.zhou@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include <va/va_backend.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "gen6_vme.h"
+
+#define VME_INTRA_SHADER 0
+#define VME_INTER_SHADER 1
+
+#define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */
+#define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
+#define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
+
+static uint32_t gen6_vme_intra_frame[][4] = {
+#include "shaders/vme/intra_frame.g6b"
+ {0,0,0,0}
+};
+
+static uint32_t gen6_vme_inter_frame[][4] = {
+#include "shaders/vme/inter_frame.g6b"
+ {0,0,0,0}
+};
+
+static struct media_kernel gen6_vme_kernels[] = {
+ {
+ "VME Intra Frame",
+ VME_INTRA_SHADER, /*index*/
+ gen6_vme_intra_frame,
+ sizeof(gen6_vme_intra_frame),
+ NULL
+ },
+ {
+ "VME inter Frame",
+ VME_INTER_SHADER,
+ gen6_vme_inter_frame,
+ sizeof(gen6_vme_inter_frame),
+ NULL
+ }
+};
+
+#define GEN6_VME_KERNEL_NUMBER ARRAY_ELEMS(gen6_vme_kernels)
+
+static void
+gen6_vme_set_common_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss3.tiled_surface = 0;
+ ss->ss3.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss3.tiled_surface = 1;
+ ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss3.tiled_surface = 1;
+ ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
+gen6_vme_set_source_surface_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss2.tiled_surface = 0;
+ ss->ss2.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+/* only used for VME source surface state */
+static void gen6_vme_source_surface_state(VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ struct i965_surface_state2 *ss;
+ dri_bo *bo;
+ int w, h, w_pitch, h_pitch;
+ unsigned int tiling, swizzle;
+
+ assert(obj_surface->bo);
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+
+ w = obj_surface->orig_width;
+ h = obj_surface->orig_height;
+ w_pitch = obj_surface->width;
+ h_pitch = obj_surface->height;
+
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME surface state",
+ sizeof(struct i965_surface_state2),
+ 0x1000);
+ assert(bo);
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ ss = bo->virtual;
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_base_address = obj_surface->bo->offset;
+
+ ss->ss1.cbcr_pixel_offset_v_direction = 2;
+ ss->ss1.width = w - 1;
+ ss->ss1.height = h - 1;
+
+ ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = w_pitch - 1;
+ ss->ss2.half_pitch_for_chroma = 0;
+
+ gen6_vme_set_source_surface_tiling(ss, tiling);
+
+ /* UV offset for interleave mode */
+ ss->ss3.x_offset_for_cb = 0;
+ ss->ss3.y_offset_for_cb = h_pitch;
+
+ dri_bo_unmap(bo);
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0,
+ offsetof(struct i965_surface_state2, ss0),
+ obj_surface->bo);
+
+ assert(index < MAX_MEDIA_SURFACES_GEN6);
+ media_state->surface_state[index].bo = bo;
+}
+
+static void
+gen6_vme_media_source_surface_state(VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ struct i965_surface_state *ss;
+ dri_bo *bo;
+ int w, h, w_pitch;
+ unsigned int tiling, swizzle;
+
+ w = obj_surface->orig_width;
+ h = obj_surface->orig_height;
+ w_pitch = obj_surface->width;
+
+ /* Y plane */
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state",
+ sizeof(struct i965_surface_state),
+ 0x1000);
+ assert(bo);
+
+ dri_bo_map(bo, True);
+ assert(bo->virtual);
+ ss = bo->virtual;
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+ ss->ss1.base_addr = obj_surface->bo->offset;
+ ss->ss2.width = w / 4 - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.pitch = w_pitch - 1;
+ gen6_vme_set_common_surface_tiling(ss, tiling);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER,
+ 0,
+ 0,
+ offsetof(struct i965_surface_state, ss1),
+ obj_surface->bo);
+ dri_bo_unmap(bo);
+
+ assert(index < MAX_MEDIA_SURFACES_GEN6);
+ media_state->surface_state[index].bo = bo;
+}
+
+static VAStatus
+gen6_vme_output_buffer_setup(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state,
+ int index)
+
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ struct object_context *obj_context = CONTEXT(context);
+ struct i965_surface_state *ss;
+ dri_bo *bo;
+ int width_in_mbs = ALIGN(obj_context->picture_width, 16) / 16;
+ int height_in_mbs = ALIGN(obj_context->picture_height, 16) / 16;
+ int num_entries;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+
+ if ( is_intra ) {
+ media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+ } else {
+ media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4;
+ }
+ media_state->vme_output.size_block = 16; /* an OWORD */
+ media_state->vme_output.pitch = ALIGN(media_state->vme_output.size_block, 16);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME output buffer",
+ media_state->vme_output.num_blocks * media_state->vme_output.pitch,
+ 0x1000);
+ assert(bo);
+ media_state->vme_output.bo = bo;
+
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME output buffer state",
+ sizeof(struct i965_surface_state),
+ 0x1000);
+ assert(bo);
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ ss = bo->virtual;
+ memset(ss, 0, sizeof(*ss));
+
+ /* always use 16 bytes as pitch on Sandy Bridge */
+ num_entries = media_state->vme_output.num_blocks * media_state->vme_output.pitch / 16;
+ ss->ss0.render_cache_read_mode = 1;
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss->ss1.base_addr = media_state->vme_output.bo->offset;
+ ss->ss2.width = ((num_entries - 1) & 0x7f);
+ ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
+ ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
+ ss->ss3.pitch = media_state->vme_output.pitch - 1;
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ offsetof(struct i965_surface_state, ss1),
+ media_state->vme_output.bo);
+
+ dri_bo_unmap(bo);
+
+ assert(index < MAX_MEDIA_SURFACES_GEN6);
+ media_state->surface_state[index].bo = bo;
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state,
+ int is_intra)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ struct object_surface *obj_surface;
+ unsigned int *binding_table;
+ dri_bo *bo = media_state->binding_table.bo;
+ int i;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+
+ /*Setup surfaces state*/
+ /* current picture for encoding */
+ obj_surface = SURFACE(encode_state->current_render_target);
+ assert(obj_surface);
+ gen6_vme_source_surface_state(ctx, 0, obj_surface);
+ gen6_vme_media_source_surface_state(ctx, 4, obj_surface);
+
+ if ( ! is_intra ) {
+ /* reference 0 */
+ obj_surface = SURFACE(pPicParameter->reference_picture);
+ assert(obj_surface);
+ gen6_vme_source_surface_state(ctx, 1, obj_surface);
+ /* reference 1, FIXME: */
+ // obj_surface = SURFACE(pPicParameter->reference_picture);
+ // assert(obj_surface);
+ //gen6_vme_source_surface_state(ctx, 2, obj_surface);
+ }
+
+ /* VME output */
+ gen6_vme_output_buffer_setup(ctx, context, encode_state, 3);
+
+ /*Building binding table*/
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ binding_table = bo->virtual;
+ memset(binding_table, 0, bo->size);
+
+ for (i = 0; i < MAX_MEDIA_SURFACES_GEN6; i++) {
+ if (media_state->surface_state[i].bo) {
+ binding_table[i] = media_state->surface_state[i].bo->offset;
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(*binding_table),
+ media_state->surface_state[i].bo);
+ }
+ }
+
+ dri_bo_unmap(media_state->binding_table.bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ struct gen6_interface_descriptor_data *desc;
+ int i;
+ dri_bo *bo;
+
+ bo = media_state->idrt.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ desc = bo->virtual;
+
+ for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+ struct media_kernel *kernel;
+ kernel = &gen6_vme_kernels[i];
+ assert(sizeof(*desc) == 32);
+ /*Setup the descritor table*/
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
+ desc->desc2.sampler_count = 1; /* FIXME: */
+ desc->desc2.sampler_state_pointer = (media_state->vme_state.bo->offset >> 5);
+ desc->desc3.binding_table_entry_count = 1; /* FIXME: */
+ desc->desc3.binding_table_pointer = (media_state->binding_table.bo->offset >> 5);
+ desc->desc4.constant_urb_entry_read_offset = 0;
+ desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
+
+ /*kernel start*/
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
+ kernel->bo);
+ /*Sampler State(VME state pointer)*/
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ (1 << 2), //
+ i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
+ media_state->vme_state.bo);
+ /*binding table*/
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 4, //One Entry
+ i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc3),
+ media_state->binding_table.bo);
+ desc++;
+ }
+ dri_bo_unmap(bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen6_vme_constant_setup(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ unsigned char *constant_buffer;
+
+ dri_bo_map(media_state->curbe.bo, 1);
+ assert(media_state->curbe.bo->virtual);
+ constant_buffer = media_state->curbe.bo->virtual;
+
+ /*TODO copy buffer into CURB*/
+
+ dri_bo_unmap( media_state->curbe.bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, VAContextID context, struct mfc_encode_state *encode_state, int is_intra)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ unsigned int *vme_state_message;
+ int i;
+
+ //building VME state message
+ dri_bo_map(media_state->vme_state.bo, 1);
+ assert(media_state->vme_state.bo->virtual);
+ vme_state_message = (unsigned int *)media_state->vme_state.bo->virtual;
+
+ for(i = 0;i < 32; i++) {
+ vme_state_message[i] = 0x11;
+ }
+ vme_state_message[16] = 0x42424242; //cost function LUT set 0 for Intra
+
+ dri_bo_unmap( media_state->vme_state.bo);
+ return VA_STATUS_SUCCESS;
+}
+
+static void gen6_vme_pipeline_select(VADriverContextP ctx)
+{
+ BEGIN_BATCH(ctx, 1);
+ OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+ ADVANCE_BATCH(ctx);
+}
+
+static void gen6_vme_state_base_address(VADriverContextP ctx)
+{
+ BEGIN_BATCH(ctx, 10);
+
+ OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 8);
+
+ OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
+ OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Surface State Base Address
+ OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Dynamic State Base Address
+ OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Indirect Object Base Address
+ OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Instruction Base Address
+
+ OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
+ OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
+ OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
+ OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
+
+ /*
+ OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address
+ OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound
+ */
+
+ ADVANCE_BATCH(ctx);
+}
+
+static void gen6_vme_vfe_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+
+ BEGIN_BATCH(ctx, 8);
+
+ OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | 6); /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
+ OUT_BATCH(ctx, 0); /*Scratch Space Base Pointer and Space*/
+ OUT_BATCH(ctx, (media_state->vfe_state.max_num_threads << 16)
+ | (media_state->vfe_state.num_urb_entries << 8)
+ | (media_state->vfe_state.gpgpu_mode << 2) ); /*Maximum Number of Threads , Number of URB Entries, MEDIA Mode*/
+ OUT_BATCH(ctx, 0); /*Debug: Object ID*/
+ OUT_BATCH(ctx, (media_state->vfe_state.urb_entry_size << 16)
+ | media_state->vfe_state.curbe_allocation_size); /*URB Entry Allocation Size , CURBE Allocation Size*/
+ OUT_BATCH(ctx, 0); /*Disable Scoreboard*/
+ OUT_BATCH(ctx, 0); /*Disable Scoreboard*/
+ OUT_BATCH(ctx, 0); /*Disable Scoreboard*/
+
+ ADVANCE_BATCH(ctx);
+
+}
+
+static void gen6_vme_curbe_load(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+
+ BEGIN_BATCH(ctx, 4);
+
+ OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | 2);
+ OUT_BATCH(ctx, 0);
+
+ OUT_BATCH(ctx, CURBE_TOTAL_DATA_LENGTH);
+ OUT_RELOC(ctx, media_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ ADVANCE_BATCH(ctx);
+}
+
+static void gen6_vme_idrt(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+
+ BEGIN_BATCH(ctx, 4);
+
+ OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_LOAD | 2);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data));
+ OUT_RELOC(ctx, media_state->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ ADVANCE_BATCH(ctx);
+}
+
+static int gen6_vme_media_object(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state,
+ int mb_x, int mb_y,
+ int kernel)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE( encode_state->current_render_target);
+ int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
+ int len_in_dowrds = 6 + 1;
+
+ BEGIN_BATCH(ctx, len_in_dowrds);
+
+ OUT_BATCH(ctx, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
+ OUT_BATCH(ctx, kernel); /*Interface Descriptor Offset*/
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+
+ /*inline data */
+ OUT_BATCH(ctx, mb_width << 16 | mb_y << 8 | mb_x); /*M0.0 Refrence0 X,Y, not used in Intra*/
+ ADVANCE_BATCH(ctx);
+
+ return len_in_dowrds * 4;
+}
+
+static void gen6_vme_media_init(VADriverContextP ctx)
+{
+ int i;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ dri_bo *bo;
+
+ /* constant buffer */
+ dri_bo_unreference(media_state->curbe.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ CURBE_TOTAL_DATA_LENGTH, 64);
+ assert(bo);
+ media_state->curbe.bo = bo;
+
+ /* surface state */
+ for (i = 0; i < MAX_MEDIA_SURFACES_GEN6; i++) {
+ dri_bo_unreference(media_state->surface_state[i].bo);
+ media_state->surface_state[i].bo = NULL;
+ }
+
+ /* binding table */
+ dri_bo_unreference(media_state->binding_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ MAX_MEDIA_SURFACES_GEN6 * sizeof(unsigned int), 32);
+ assert(bo);
+ media_state->binding_table.bo = bo;
+
+ /* interface descriptor remapping table */
+ dri_bo_unreference(media_state->idrt.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ MAX_INTERFACE_DESC_GEN6 * sizeof(struct gen6_interface_descriptor_data), 16);
+ assert(bo);
+ media_state->idrt.bo = bo;
+
+ /* VME output buffer */
+ dri_bo_unreference(media_state->vme_output.bo);
+ media_state->vme_output.bo = NULL;
+
+ /* VME state */
+ dri_bo_unreference(media_state->vme_state.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 1024*16, 64);
+ assert(bo);
+ media_state->vme_state.bo = bo;
+
+ media_state->vfe_state.max_num_threads = 60 - 1;
+ media_state->vfe_state.num_urb_entries = 16;
+ media_state->vfe_state.gpgpu_mode = 0;
+ media_state->vfe_state.urb_entry_size = 59 - 1;
+ media_state->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
+}
+
+static void gen6_vme_pipeline_programing(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_context *obj_context = CONTEXT(context);
+ int width_in_mbs = (obj_context->picture_width + 15) / 16;
+ int height_in_mbs = (obj_context->picture_height + 15) / 16;
+ int x, y;
+ int emit_new_state = 1, object_len_in_bytes;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+
+ intel_batchbuffer_start_atomic(ctx, 0x1000);
+
+ for(y = 0; y < height_in_mbs; y++){
+ for(x = 0; x < width_in_mbs; x++){
+
+ if (emit_new_state) {
+ /*Step1: MI_FLUSH/PIPE_CONTROL*/
+ BEGIN_BATCH(ctx, 4);
+ OUT_BATCH(ctx, CMD_PIPE_CONTROL | 0x02);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ ADVANCE_BATCH(ctx);
+
+ /*Step2: State command PIPELINE_SELECT*/
+ gen6_vme_pipeline_select(ctx);
+
+ /*Step3: State commands configuring pipeline states*/
+ gen6_vme_state_base_address(ctx);
+ gen6_vme_vfe_state(ctx);
+ gen6_vme_curbe_load(ctx);
+ gen6_vme_idrt(ctx);
+
+ emit_new_state = 0;
+ }
+
+ /*Step4: Primitive commands*/
+ object_len_in_bytes = gen6_vme_media_object(ctx, context, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
+
+ if (intel_batchbuffer_check_free_space(ctx, object_len_in_bytes) == 0) {
+ intel_batchbuffer_end_atomic(ctx);
+ intel_batchbuffer_flush(ctx);
+ emit_new_state = 1;
+ intel_batchbuffer_start_atomic(ctx, 0x1000);
+ }
+ }
+ }
+
+ intel_batchbuffer_end_atomic(ctx);
+}
+
+static VAStatus gen6_vme_prepare(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+
+ /*Setup all the memory object*/
+ gen6_vme_surface_setup(ctx, context, encode_state, is_intra);
+ gen6_vme_interface_setup(ctx, context, encode_state);
+ gen6_vme_constant_setup(ctx, context, encode_state);
+ gen6_vme_vme_state_setup(ctx, context, encode_state, is_intra);
+
+ /*Programing media pipeline*/
+ gen6_vme_pipeline_programing(ctx, context, encode_state);
+
+ return vaStatus;
+}
+
+static VAStatus gen6_vme_run(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ intel_batchbuffer_flush(ctx);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen6_vme_stop(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ return VA_STATUS_SUCCESS;
+}
+
+VAStatus gen6_vme_media_pipeline(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ gen6_vme_media_init(ctx);
+ gen6_vme_prepare(ctx, context, encode_state);
+ gen6_vme_run(ctx, context, encode_state);
+ gen6_vme_stop(ctx, context, encode_state);
+
+ return VA_STATUS_SUCCESS;
+}
+
+Bool gen6_vme_init(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i;
+
+ for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+ /*Load kernel into GPU memory*/
+ struct media_kernel *kernel = &gen6_vme_kernels[i];
+
+ kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
+ kernel->name,
+ kernel->size,
+ 0x1000);
+ assert(kernel->bo);
+ dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
+ }
+
+ return True;
+}
+
+Bool gen6_vme_terminate(VADriverContextP ctx)
+{
+ int i;
+
+ for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+ /*Load kernel into GPU memory*/
+ struct media_kernel *kernel = &gen6_vme_kernels[i];
+
+ dri_bo_unreference(kernel->bo);
+ kernel->bo = NULL;
+ }
+
+ return True;
+}
diff --git a/gen6_vme.h b/gen6_vme.h
new file mode 100644
index 0000000..57b23f1
--- /dev/null
+++ b/gen6_vme.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWAR
+ *
+ * Authors:
+ * Zhou Chang <chang.zhou@intel.com>
+ *
+ */
+
+#ifndef _GEN6_MEDIA_H_
+#define _GEN6_MEDIA_H_
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+
+
+#define MAX_INTERFACE_DESC_GEN6 32
+#define MAX_MEDIA_SURFACES_GEN6 34
+
+struct mfc_encode_state;
+
+struct gen6_media_state
+{
+ struct {
+ dri_bo *bo;
+ } surface_state[MAX_MEDIA_SURFACES_GEN6];
+
+ struct {
+ dri_bo *bo;
+ } binding_table;
+
+ struct {
+ dri_bo *bo;
+ } idrt; /* interface descriptor remap table */
+
+ struct {
+ dri_bo *bo;
+ } curbe;
+
+ struct {
+ unsigned int gpgpu_mode:1;
+ unsigned int max_num_threads:16;
+ unsigned int num_urb_entries:8;
+ unsigned int urb_entry_size:16;
+ unsigned int curbe_allocation_size:16;
+ } vfe_state;
+
+ struct {
+ dri_bo *bo;
+ } vme_state;
+
+ struct {
+ dri_bo *bo;
+ unsigned int num_blocks;
+ unsigned int size_block; /* in bytes */
+ unsigned int pitch;
+ } vme_output;
+};
+
+VAStatus gen6_vme_media_pipeline(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state);
+Bool gen6_vme_init(VADriverContextP ctx);
+Bool gen6_vme_terminate(VADriverContextP ctx);
+
+#endif /* _GEN6_MEDIA_H_ */
diff --git a/i965_defines.h b/i965_defines.h
index 678d348..dfbae3f 100644
--- a/i965_defines.h
+++ b/i965_defines.h
@@ -17,6 +17,9 @@
#define CMD_SAMPLER_PALETTE_LOAD CMD(3, 1, 2)
#define CMD_MEDIA_STATE_POINTERS CMD(2, 0, 0)
+#define CMD_MEDIA_VFE_STATE CMD(2, 0, 0)
+#define CMD_MEDIA_CURBE_LOAD CMD(2, 0, 1)
+#define CMD_MEDIA_INTERFACE_LOAD CMD(2, 0, 2)
#define CMD_MEDIA_OBJECT CMD(2, 1, 0)
#define CMD_MEDIA_OBJECT_EX CMD(2, 1, 1)
@@ -54,6 +57,8 @@
/* DW1 */
# define CMD_CLEAR_PARAMS_DEPTH_CLEAR_VALID (1 << 15)
+#define CMD_PIPE_CONTROL CMD(3, 2, 0)
+
/* for GEN6+ */
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS CMD(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
@@ -166,6 +171,10 @@
#define MFD_AVC_BSD_OBJECT MFX(2, 1, 1, 8)
+#define MFC_AVC_FQM_STATE MFX(2, 1, 2, 2)
+#define MFC_AVC_INSERT_OBJECT MFX(2, 1, 2, 8)
+#define MFC_AVC_PAK_OBJECT MFX(2, 1, 2, 9)
+
#define MFX_MPEG2_PIC_STATE MFX(2, 3, 0, 0)
#define MFX_MPEG2_QM_STATE MFX(2, 3, 0, 1)
diff --git a/i965_drv_video.c b/i965_drv_video.c
index e1bceba..aa746f2 100644
--- a/i965_drv_video.c
+++ b/i965_drv_video.c
@@ -41,6 +41,7 @@
#include "i965_media.h"
#include "i965_drv_video.h"
#include "i965_defines.h"
+#include "i965_encoder.h"
#define CONFIG_ID_OFFSET 0x01000000
#define CONTEXT_ID_OFFSET 0x02000000
@@ -49,19 +50,22 @@
#define IMAGE_ID_OFFSET 0x0a000000
#define SUBPIC_ID_OFFSET 0x10000000
-#define HAS_MPEG2(ctx) (IS_G4X((ctx)->intel.device_id) || \
+#define HAS_MPEG2(ctx) (IS_G4X((ctx)->intel.device_id) || \
IS_IRONLAKE((ctx)->intel.device_id) || \
(IS_GEN6((ctx)->intel.device_id) && (ctx)->intel.has_bsd))
#define HAS_H264(ctx) ((IS_GEN6((ctx)->intel.device_id) || \
IS_IRONLAKE((ctx)->intel.device_id)) && \
- (ctx)->intel.has_bsd)
+ (ctx)->intel.has_bsd)
#define HAS_VC1(ctx) (IS_GEN6((ctx)->intel.device_id) && (ctx)->intel.has_bsd)
-#define HAS_TILED_SURFACE(ctx) (IS_GEN6((ctx)->intel.device_id) && \
+#define HAS_TILED_SURFACE(ctx) (IS_GEN6((ctx)->intel.device_id) && \
(ctx)->render_state.interleaved_uv)
+#define HAS_ENCODER(ctx) (IS_GEN6((ctx)->intel.device_id) && \
+ (ctx)->intel.has_bsd)
+
enum {
I965_SURFACETYPE_RGBA = 1,
I965_SURFACETYPE_YUV,
@@ -181,6 +185,10 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx,
case VAProfileH264High:
if (HAS_H264(i965))
entrypoint_list[n++] = VAEntrypointVLD;
+
+ if (HAS_ENCODER(i965))
+ entrypoint_list[n++] = VAEntrypointEncSlice;
+
break;
case VAProfileVC1Simple:
@@ -217,6 +225,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
attrib_list[i].value = VA_RT_FORMAT_YUV420;
break;
+ case VAConfigAttribRateControl:
+ attrib_list[i].value = VA_RC_VBR;
+ break;
+
default:
/* Do nothing */
attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED;
@@ -286,7 +298,8 @@ i965_CreateConfig(VADriverContextP ctx,
case VAProfileH264Baseline:
case VAProfileH264Main:
case VAProfileH264High:
- if (HAS_H264(i965) && VAEntrypointVLD == entrypoint) {
+ if ((HAS_H264(i965) && VAEntrypointVLD == entrypoint) ||
+ (HAS_ENCODER(i965) && VAEntrypointEncSlice == entrypoint)) {
vaStatus = VA_STATUS_SUCCESS;
} else {
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
@@ -546,7 +559,7 @@ i965_QuerySubpictureFormats(VADriverContextP ctx,
static void
i965_destroy_subpic(struct object_heap *heap, struct object_base *obj)
{
-// struct object_subpic *obj_subpic = (struct object_subpic *)obj;
+ // struct object_subpic *obj_subpic = (struct object_subpic *)obj;
object_heap_free(heap, obj);
}
@@ -682,7 +695,7 @@ i965_DeassociateSubpicture(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
-static void
+void
i965_reference_buffer_store(struct buffer_store **ptr,
struct buffer_store *buffer_store)
{
@@ -694,7 +707,7 @@ i965_reference_buffer_store(struct buffer_store **ptr,
}
}
-static void
+void
i965_release_buffer_store(struct buffer_store **ptr)
{
struct buffer_store *buffer_store = *ptr;
@@ -790,16 +803,9 @@ i965_CreateContext(VADriverContextP ctx,
break;
}
- obj_context->context_id = contextID;
*context = contextID;
- memset(&obj_context->decode_state, 0, sizeof(obj_context->decode_state));
- obj_context->decode_state.current_render_target = -1;
- obj_context->decode_state.max_slice_params = NUM_SLICES;
- obj_context->decode_state.max_slice_datas = NUM_SLICES;
- obj_context->decode_state.slice_params = calloc(obj_context->decode_state.max_slice_params,
- sizeof(*obj_context->decode_state.slice_params));
- obj_context->decode_state.slice_datas = calloc(obj_context->decode_state.max_slice_datas,
- sizeof(*obj_context->decode_state.slice_datas));
+ obj_context->flags = flag;
+ obj_context->context_id = contextID;
obj_context->config_id = config_id;
obj_context->picture_width = picture_width;
obj_context->picture_height = picture_height;
@@ -816,7 +822,22 @@ i965_CreateContext(VADriverContextP ctx,
obj_context->render_targets[i] = render_targets[i];
}
- obj_context->flags = flag;
+ if (VA_STATUS_SUCCESS == vaStatus) {
+ if (VAEntrypointEncSlice == obj_config->entrypoint ) { /*encode routin only*/
+ memset(&obj_context->encode_state, 0, sizeof(obj_context->encode_state));
+ vaStatus = i965_encoder_create_context(ctx, config_id, picture_width, picture_height,
+ flag, render_targets, num_render_targets, obj_context);
+ } else {
+ memset(&obj_context->decode_state, 0, sizeof(obj_context->decode_state));
+ obj_context->decode_state.current_render_target = -1;
+ obj_context->decode_state.max_slice_params = NUM_SLICES;
+ obj_context->decode_state.max_slice_datas = NUM_SLICES;
+ obj_context->decode_state.slice_params = calloc(obj_context->decode_state.max_slice_params,
+ sizeof(*obj_context->decode_state.slice_params));
+ obj_context->decode_state.slice_datas = calloc(obj_context->decode_state.max_slice_datas,
+ sizeof(*obj_context->decode_state.slice_datas));
+ }
+ }
/* Error recovery */
if (VA_STATUS_SUCCESS != vaStatus) {
@@ -831,9 +852,19 @@ i965_DestroyContext(VADriverContextP ctx, VAContextID context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_context *obj_context = CONTEXT(context);
+ struct object_config *obj_config;
+ VAContextID config;
assert(obj_context);
- i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
+ config = obj_context->config_id;
+ obj_config = CONFIG(config);
+ assert(obj_config);
+
+ if (VAEntrypointEncSlice == obj_config->entrypoint ){
+ i965_encoder_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
+ } else {
+ i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
+ }
return VA_STATUS_SUCCESS;
}
@@ -875,6 +906,10 @@ i965_create_buffer_internal(VADriverContextP ctx,
case VAResidualDataBufferType:
case VADeblockingParameterBufferType:
case VAImageBufferType:
+ case VAEncCodedBufferType:
+ case VAEncSequenceParameterBufferType:
+ case VAEncPictureParameterBufferType:
+ case VAEncSliceParameterBufferType:
/* Ok */
break;
@@ -889,6 +924,10 @@ i965_create_buffer_internal(VADriverContextP ctx,
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
+ if (type == VAEncCodedBufferType) {
+ size += ALIGN(sizeof(VACodedBufferSegment), 64);
+ }
+
obj_buffer->max_num_elements = num_elements;
obj_buffer->num_elements = num_elements;
obj_buffer->size_element = size;
@@ -904,14 +943,26 @@ i965_create_buffer_internal(VADriverContextP ctx,
if (data)
dri_bo_subdata(buffer_store->bo, 0, size * num_elements, data);
- } else if (type == VASliceDataBufferType || type == VAImageBufferType) {
+ } else if (type == VASliceDataBufferType || type == VAImageBufferType || type == VAEncCodedBufferType) {
buffer_store->bo = dri_bo_alloc(i965->intel.bufmgr,
"Buffer",
size * num_elements, 64);
assert(buffer_store->bo);
- if (data)
+ if (type == VAEncCodedBufferType) {
+ VACodedBufferSegment *coded_buffer_segment;
+ dri_bo_map(buffer_store->bo, 1);
+ coded_buffer_segment = (VACodedBufferSegment *)buffer_store->bo->virtual;
+ coded_buffer_segment->size = size - ALIGN(sizeof(VACodedBufferSegment), 64);
+ coded_buffer_segment->bit_offset = 0;
+ coded_buffer_segment->status = 0;
+ coded_buffer_segment->buf = NULL;
+ coded_buffer_segment->next = NULL;
+ dri_bo_unmap(buffer_store->bo);
+ } else if (data) {
dri_bo_subdata(buffer_store->bo, 0, size * num_elements, data);
+ }
+
} else {
buffer_store->buffer = malloc(size * num_elements);
assert(buffer_store->buffer);
@@ -990,6 +1041,12 @@ i965_MapBuffer(VADriverContextP ctx,
assert(obj_buffer->buffer_store->bo->virtual);
*pbuf = obj_buffer->buffer_store->bo->virtual;
+
+ if (obj_buffer->type == VAEncCodedBufferType) {
+ VACodedBufferSegment *coded_buffer_segment = (VACodedBufferSegment *)(obj_buffer->buffer_store->bo->virtual);
+ coded_buffer_segment->buf = (unsigned char *)(obj_buffer->buffer_store->bo->virtual) + ALIGN(sizeof(VACodedBufferSegment), 64);
+ }
+
vaStatus = VA_STATUS_SUCCESS;
} else if (NULL != obj_buffer->buffer_store->buffer) {
*pbuf = obj_buffer->buffer_store->buffer;
@@ -1061,6 +1118,11 @@ i965_BeginPicture(VADriverContextP ctx,
obj_config = CONFIG(config);
assert(obj_config);
+ if (VAEntrypointEncSlice == obj_config->entrypoint ){
+ vaStatus = i965_encoder_begin_picture(ctx, context, render_target);
+ return vaStatus;
+ }
+
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
@@ -1186,12 +1248,23 @@ i965_RenderPicture(VADriverContextP ctx,
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_context *obj_context;
+ struct object_config *obj_config;
+ VAContextID config;
int i;
VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
obj_context = CONTEXT(context);
assert(obj_context);
+ config = obj_context->config_id;
+ obj_config = CONFIG(config);
+ assert(obj_config);
+
+ if (VAEntrypointEncSlice == obj_config->entrypoint ){
+ vaStatus = i965_encoder_render_picture(ctx, context, buffers, num_buffers);
+ return vaStatus;
+ }
+
for (i = 0; i < num_buffers; i++) {
struct object_buffer *obj_buffer = BUFFER(buffers[i]);
assert(obj_buffer);
@@ -1235,15 +1308,19 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context)
int i;
assert(obj_context);
+ config = obj_context->config_id;
+ obj_config = CONFIG(config);
+ assert(obj_config);
+
+ if (VAEntrypointEncSlice == obj_config->entrypoint ){
+ return i965_encoder_end_picture(ctx, context);
+ }
+
assert(obj_context->decode_state.pic_param);
assert(obj_context->decode_state.num_slice_params >= 1);
assert(obj_context->decode_state.num_slice_datas >= 1);
assert(obj_context->decode_state.num_slice_params == obj_context->decode_state.num_slice_datas);
- config = obj_context->config_id;
- obj_config = CONFIG(config);
- assert(obj_config);
-
i965_media_decode_picture(ctx, obj_config->profile, &obj_context->decode_state);
obj_context->decode_state.current_render_target = -1;
obj_context->decode_state.num_slice_params = 0;
@@ -1378,6 +1455,9 @@ i965_Init(VADriverContextP ctx)
if (i965_render_init(ctx) == False)
return VA_STATUS_ERROR_UNKNOWN;
+ if (HAS_ENCODER(i965) && (i965_encoder_init(ctx) == False))
+ return VA_STATUS_ERROR_UNKNOWN;
+
return VA_STATUS_SUCCESS;
}
@@ -1520,7 +1600,7 @@ i965_CreateImage(VADriverContextP ctx,
*out_image = *image;
return VA_STATUS_SUCCESS;
-error:
+ error:
i965_DestroyImage(ctx, image_id);
return va_status;
}
@@ -1657,7 +1737,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
-error:
+ error:
i965_DestroyImage(ctx, image_id);
return va_status;
}
@@ -2003,9 +2083,9 @@ i965_PutSurface(VADriverContextP ctx,
pp_flag |= I965_PP_FLAG_DEINTERLACING;
intel_render_put_surface(ctx, surface,
- srcx, srcy, srcw, srch,
- destx, desty, destw, desth,
- pp_flag);
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth,
+ pp_flag);
if(obj_surface->subpic != VA_INVALID_ID) {
intel_render_put_subpicture(ctx, surface,
@@ -2033,6 +2113,9 @@ i965_Terminate(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
+ if (HAS_ENCODER(i965) && (i965_encoder_terminate(ctx) == False))
+ return VA_STATUS_ERROR_UNKNOWN;
+
if (i965_render_terminate(ctx) == False)
return VA_STATUS_ERROR_UNKNOWN;
@@ -2116,7 +2199,7 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx )
vtable->vaQueryDisplayAttributes = i965_QueryDisplayAttributes;
vtable->vaGetDisplayAttributes = i965_GetDisplayAttributes;
vtable->vaSetDisplayAttributes = i965_SetDisplayAttributes;
-// vtable->vaDbgCopySurfaceToBuffer = i965_DbgCopySurfaceToBuffer;
+ // vtable->vaDbgCopySurfaceToBuffer = i965_DbgCopySurfaceToBuffer;
i965 = (struct i965_driver_data *)calloc(1, sizeof(*i965));
assert(i965);
diff --git a/i965_drv_video.h b/i965_drv_video.h
index 16694d0..3314285 100644
--- a/i965_drv_video.h
+++ b/i965_drv_video.h
@@ -40,6 +40,9 @@
#include "i965_media.h"
#include "i965_render.h"
+#include "gen6_vme.h"
+#include "gen6_mfc.h"
+
#define I965_MAX_PROFILES 11
#define I965_MAX_ENTRYPOINTS 5
#define I965_MAX_CONFIG_ATTRIBUTES 10
@@ -81,17 +84,32 @@ struct decode_state
int num_slice_datas;
};
+//keeping mfc encoder's stuff here
+struct mfc_encode_state
+{
+ struct buffer_store *seq_param;
+ struct buffer_store *pic_param;
+ struct buffer_store *pic_control;
+ struct buffer_store *iq_matrix;
+ struct buffer_store *q_matrix;
+ struct buffer_store **slice_params;
+ VASurfaceID current_render_target;
+ int max_slice_params;
+ int num_slice_params;
+};
+
struct object_context
{
struct object_base base;
VAContextID context_id;
VAConfigID config_id;
- VASurfaceID *render_targets;
+ VASurfaceID *render_targets; //input->encode, output->decode
int num_render_targets;
int picture_width;
int picture_height;
int flags;
struct decode_state decode_state;
+ struct mfc_encode_state encode_state;
};
#define SURFACE_REFERENCED (1 << 0)
@@ -168,6 +186,8 @@ struct i965_driver_data
struct i965_media_state media_state;
struct i965_render_state render_state;
void *pp_context;
+ struct gen6_media_state gen6_media_state;
+ struct gen6_mfc_bcs_state gen6_mfc_bcs_state;
};
#define NEW_CONFIG_ID() object_heap_allocate(&i965->config_heap);
diff --git a/i965_encoder.c b/i965_encoder.c
new file mode 100644
index 0000000..cb21344
--- /dev/null
+++ b/i965_encoder.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhou Chang <chang.zhou@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <va/va_backend.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_encoder.h"
+
+extern void i965_reference_buffer_store(struct buffer_store **ptr,
+ struct buffer_store *buffer_store);
+extern void i965_release_buffer_store(struct buffer_store **ptr);
+
+static VAStatus i965_encoder_render_squence_parameter_buffer(VADriverContextP ctx,
+ struct object_context *obj_context,
+ struct object_buffer *obj_buffer)
+{
+ assert(obj_buffer->buffer_store->bo == NULL);
+ assert(obj_buffer->buffer_store->buffer);
+ i965_release_buffer_store(&obj_context->encode_state.seq_param);
+ i965_reference_buffer_store(&obj_context->encode_state.seq_param,
+ obj_buffer->buffer_store);
+
+ return VA_STATUS_SUCCESS;
+}
+
+
+static VAStatus i965_encoder_render_picture_parameter_buffer(VADriverContextP ctx,
+ struct object_context *obj_context,
+ struct object_buffer *obj_buffer)
+{
+ assert(obj_buffer->buffer_store->bo == NULL);
+ assert(obj_buffer->buffer_store->buffer);
+ i965_release_buffer_store(&obj_context->encode_state.pic_param);
+ i965_reference_buffer_store(&obj_context->encode_state.pic_param,
+ obj_buffer->buffer_store);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus i965_encoder_render_slice_parameter_buffer(VADriverContextP ctx,
+ struct object_context *obj_context,
+ struct object_buffer *obj_buffer)
+{
+ if (obj_context->encode_state.num_slice_params == obj_context->encode_state.max_slice_params) {
+ obj_context->encode_state.slice_params = realloc(obj_context->encode_state.slice_params,
+ (obj_context->encode_state.max_slice_params + NUM_SLICES) * sizeof(*obj_context->encode_state.slice_params));
+ memset(obj_context->encode_state.slice_params + obj_context->encode_state.max_slice_params, 0, NUM_SLICES * sizeof(*obj_context->encode_state.slice_params));
+ obj_context->encode_state.max_slice_params += NUM_SLICES;
+ }
+
+ i965_release_buffer_store(&obj_context->encode_state.slice_params[obj_context->encode_state.num_slice_params]);
+ i965_reference_buffer_store(&obj_context->encode_state.slice_params[obj_context->encode_state.num_slice_params],
+ obj_buffer->buffer_store);
+ obj_context->encode_state.num_slice_params++;
+
+ return VA_STATUS_SUCCESS;
+}
+
+static void i965_encoder_render_picture_control_buffer(VADriverContextP ctx,
+ struct object_context *obj_context,
+ struct object_buffer *obj_buffer)
+{
+ assert(obj_buffer->buffer_store->bo == NULL);
+ assert(obj_buffer->buffer_store->buffer);
+ i965_release_buffer_store(&obj_context->encode_state.pic_control);
+ i965_reference_buffer_store(&obj_context->encode_state.pic_control,
+ obj_buffer->buffer_store);
+}
+
+static void i965_encoder_render_qmatrix_buffer(VADriverContextP ctx,
+ struct object_context *obj_context,
+ struct object_buffer *obj_buffer)
+{
+ assert(obj_buffer->buffer_store->bo == NULL);
+ assert(obj_buffer->buffer_store->buffer);
+ i965_release_buffer_store(&obj_context->encode_state.q_matrix);
+ i965_reference_buffer_store(&obj_context->encode_state.iq_matrix,
+ obj_buffer->buffer_store);
+}
+
+static void i965_encoder_render_iqmatrix_buffer(VADriverContextP ctx,
+ struct object_context *obj_context,
+ struct object_buffer *obj_buffer)
+{
+ assert(obj_buffer->buffer_store->bo == NULL);
+ assert(obj_buffer->buffer_store->buffer);
+ i965_release_buffer_store(&obj_context->encode_state.iq_matrix);
+ i965_reference_buffer_store(&obj_context->encode_state.iq_matrix,
+ obj_buffer->buffer_store);
+}
+
+VAStatus i965_encoder_create_context(VADriverContextP ctx,
+ VAConfigID config_id,
+ int picture_width,
+ int picture_height,
+ int flag,
+ VASurfaceID *render_targets,
+ int num_render_targets,
+ struct object_context *obj_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_config *obj_config = CONFIG(config_id);
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+
+
+ if (NULL == obj_config) {
+ vaStatus = VA_STATUS_ERROR_INVALID_CONFIG;
+ return vaStatus;
+ }
+
+ if (NULL == obj_context) {
+ vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
+ return vaStatus;
+ }
+
+ if( VAProfileH264Baseline != obj_config->profile ||
+ VAEntrypointEncSlice != obj_config->entrypoint) {
+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ return vaStatus;
+ }
+
+ /*encdoe_state init */
+ obj_context->encode_state.current_render_target = VA_INVALID_ID;
+ obj_context->encode_state.max_slice_params = NUM_SLICES;
+ obj_context->encode_state.slice_params = calloc(obj_context->encode_state.max_slice_params,
+ sizeof(*obj_context->encode_state.slice_params));
+
+ return vaStatus;
+}
+
+
+VAStatus i965_encoder_begin_picture(VADriverContextP ctx,
+ VAContextID context,
+ VASurfaceID render_target)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_context *obj_context = CONTEXT(context);
+ struct object_surface *obj_surface = SURFACE(render_target);
+ struct object_config *obj_config;
+ VAContextID config;
+ VAStatus vaStatus;
+
+ assert(obj_context);
+ assert(obj_surface);
+
+ config = obj_context->config_id;
+ obj_config = CONFIG(config);
+ assert(obj_config);
+
+ if( VAProfileH264Baseline != obj_config->profile ||
+ VAEntrypointEncSlice != obj_config->entrypoint){
+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }else{
+ vaStatus = VA_STATUS_SUCCESS;
+ }
+
+ obj_context->encode_state.current_render_target = render_target; /*This is input new frame*/
+
+ return vaStatus;
+}
+
+VAStatus i965_encoder_render_picture(VADriverContextP ctx,
+ VAContextID context,
+ VABufferID *buffers,
+ int num_buffers)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_context *obj_context = CONTEXT(context);
+ struct object_config *obj_config;
+ VAContextID config;
+ VAStatus vaStatus;
+ int i;
+
+ assert(obj_context);
+ config = obj_context->config_id;
+ obj_config = CONFIG(config);
+ assert(obj_config);
+
+
+ for (i = 0; i < num_buffers; i++) {
+ struct object_buffer *obj_buffer = BUFFER(buffers[i]);
+ assert(obj_buffer);
+
+ switch (obj_buffer->type) {
+ case VAEncSequenceParameterBufferType:
+ i965_encoder_render_squence_parameter_buffer(ctx, obj_context, obj_buffer);
+ break;
+
+ case VAEncPictureParameterBufferType:
+ i965_encoder_render_picture_parameter_buffer(ctx, obj_context, obj_buffer);
+ break;
+
+ case VAEncSliceParameterBufferType:
+ i965_encoder_render_slice_parameter_buffer(ctx, obj_context, obj_buffer);
+ break;
+
+ case VAPictureParameterBufferType:
+ i965_encoder_render_picture_control_buffer(ctx, obj_context, obj_buffer);
+ break;
+
+ case VAQMatrixBufferType:
+ i965_encoder_render_qmatrix_buffer(ctx, obj_context, obj_buffer);
+ break;
+
+ case VAIQMatrixBufferType:
+ i965_encoder_render_iqmatrix_buffer(ctx, obj_context, obj_buffer);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ vaStatus = VA_STATUS_SUCCESS;
+ return vaStatus;
+}
+
+static VAStatus
+gen6_encoder_end_picture(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state)
+{
+ VAStatus vaStatus;
+
+ vaStatus = gen6_vme_media_pipeline(ctx, context, encode_state);
+
+ if (vaStatus == VA_STATUS_SUCCESS)
+ vaStatus = gen6_mfc_pipeline(ctx, context, encode_state);
+
+ return vaStatus;
+}
+
+VAStatus i965_encoder_end_picture(VADriverContextP ctx, VAContextID context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_context *obj_context = CONTEXT(context);
+ struct object_config *obj_config;
+ VAContextID config;
+ VAStatus vaStatus;
+ int i;
+
+ assert(obj_context);
+ config = obj_context->config_id;
+ obj_config = CONFIG(config);
+ assert(obj_config);
+
+ assert(obj_context->encode_state.pic_param);
+ assert(obj_context->encode_state.num_slice_params >= 1);
+
+ if (IS_GEN6(i965->intel.device_id)) {
+ vaStatus = gen6_encoder_end_picture(ctx, context, &(obj_context->encode_state));
+ } else {
+ /* add for other chipset */
+ assert(0);
+ }
+
+ obj_context->encode_state.current_render_target = VA_INVALID_SURFACE;
+ obj_context->encode_state.num_slice_params = 0;
+ i965_release_buffer_store(&obj_context->encode_state.pic_param);
+
+ for (i = 0; i < obj_context->encode_state.num_slice_params; i++) {
+ i965_release_buffer_store(&obj_context->encode_state.slice_params[i]);
+ }
+
+ return VA_STATUS_SUCCESS;
+}
+
+
+void i965_encoder_destroy_context(struct object_heap *heap, struct object_base *obj)
+{
+ struct object_context *obj_context = (struct object_context *)obj;
+
+ assert(obj_context->encode_state.num_slice_params <= obj_context->encode_state.max_slice_params);
+
+ i965_release_buffer_store(&obj_context->encode_state.pic_param);
+ i965_release_buffer_store(&obj_context->encode_state.seq_param);
+
+ free(obj_context->render_targets);
+ object_heap_free(heap, obj);
+}
+
+Bool i965_encoder_init(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_GEN6(i965->intel.device_id)) {
+ gen6_vme_init(ctx);
+ }
+
+ return True;
+}
+
+Bool i965_encoder_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_media_state *media_state = &i965->gen6_media_state;
+ struct gen6_mfc_bcs_state *bcs_state = &i965->gen6_mfc_bcs_state;
+ int i;
+
+ if (IS_GEN6(i965->intel.device_id)) {
+ gen6_vme_terminate(ctx);
+ }
+
+ for (i = 0; i < MAX_MEDIA_SURFACES_GEN6; i++) {
+ dri_bo_unreference(media_state->surface_state[i].bo);
+ media_state->surface_state[i].bo = NULL;
+ }
+
+ dri_bo_unreference(media_state->idrt.bo);
+ media_state->idrt.bo = NULL;
+
+ dri_bo_unreference(media_state->binding_table.bo);
+ media_state->binding_table.bo = NULL;
+
+ dri_bo_unreference(media_state->curbe.bo);
+ media_state->curbe.bo = NULL;
+
+ dri_bo_unreference(media_state->vme_output.bo);
+ media_state->vme_output.bo = NULL;
+
+ dri_bo_unreference(media_state->vme_state.bo);
+ media_state->vme_state.bo = NULL;
+
+ dri_bo_unreference(bcs_state->post_deblocking_output.bo);
+ bcs_state->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(bcs_state->pre_deblocking_output.bo);
+ bcs_state->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(bcs_state->uncompressed_picture_source.bo);
+ bcs_state->uncompressed_picture_source.bo = NULL;
+
+ dri_bo_unreference(bcs_state->mfc_indirect_pak_bse_object.bo);
+ bcs_state->mfc_indirect_pak_bse_object.bo = NULL;
+
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+ dri_bo_unreference(bcs_state->direct_mv_buffers[i].bo);
+ bcs_state->direct_mv_buffers[i].bo = NULL;
+ }
+
+ dri_bo_unreference(bcs_state->intra_row_store_scratch_buffer.bo);
+ bcs_state->intra_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(bcs_state->deblocking_filter_row_store_scratch_buffer.bo);
+ bcs_state->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(bcs_state->bsd_mpc_row_store_scratch_buffer.bo);
+ bcs_state->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+ return True;
+}
diff --git a/i965_encoder.h b/i965_encoder.h
new file mode 100644
index 0000000..e1e0b16
--- /dev/null
+++ b/i965_encoder.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhou chang <chang.zhou@intel.com>
+ *
+ */
+
+#ifndef _GEN6_MFC_H_
+#define _GEN6_MFC_H_
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+#include "i965_structs.h"
+#include "i965_drv_video.h"
+
+
+VAStatus i965_encoder_create_context(
+ VADriverContextP ctx,
+ VAConfigID config_id,
+ int picture_width,
+ int picture_height,
+ int flag,
+ VASurfaceID *render_targets,
+ int num_render_targets,
+ struct object_context *obj_context
+ );
+
+VAStatus i965_encoder_begin_picture(
+ VADriverContextP ctx,
+ VAContextID context,
+ VASurfaceID render_target
+ );
+
+VAStatus i965_encoder_render_picture(VADriverContextP ctx,
+ VAContextID context,
+ VABufferID *buffers,
+ int num_buffers
+ );
+
+VAStatus i965_encoder_end_picture(VADriverContextP ctx,
+ VAContextID context
+ );
+
+
+void i965_encoder_destroy_context(struct object_heap *heap, struct object_base *obj);
+
+Bool i965_encoder_init(VADriverContextP ctx);
+Bool i965_encoder_terminate(VADriverContextP ctx);
+
+#endif /* _GEN6_MFC_H_ */
+
+
diff --git a/intel_batchbuffer.c b/intel_batchbuffer.c
index 9d623f1..f4e629a 100644
--- a/intel_batchbuffer.c
+++ b/intel_batchbuffer.c
@@ -431,3 +431,19 @@ intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag)
intel_batchbuffer_flush_helper(ctx, intel->batch);
intel->batch->flag = flag;
}
+
+int
+intel_batchbuffer_check_free_space(VADriverContextP ctx, int size)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+
+ return intel_batchbuffer_space_helper(intel->batch) >= size;
+}
+
+int
+intel_batchbuffer_check_free_space_bcs(VADriverContextP ctx, int size)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+
+ return intel_batchbuffer_space_helper(intel->batch_bcs) >= size;
+}
diff --git a/intel_batchbuffer.h b/intel_batchbuffer.h
index 25652e1..77174fe 100644
--- a/intel_batchbuffer.h
+++ b/intel_batchbuffer.h
@@ -59,6 +59,9 @@ void intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx);
void intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag);
+int intel_batchbuffer_check_free_space(VADriverContextP ctx, int size);
+int intel_batchbuffer_check_free_space_bcs(VADriverContextP ctx, int size);
+
#define __BEGIN_BATCH(ctx, n, flag) do { \
intel_batchbuffer_check_batchbuffer_flag(ctx, flag); \
intel_batchbuffer_require_space(ctx, (n) * 4); \
diff --git a/shaders/Makefile.am b/shaders/Makefile.am
index e2b6223..17770a9 100644
--- a/shaders/Makefile.am
+++ b/shaders/Makefile.am
@@ -1 +1 @@
-SUBDIRS = h264 mpeg2 render post_processing
+SUBDIRS = h264 mpeg2 render post_processing vme
diff --git a/shaders/vme/Makefile.am b/shaders/vme/Makefile.am
new file mode 100644
index 0000000..2a8175f
--- /dev/null
+++ b/shaders/vme/Makefile.am
@@ -0,0 +1,20 @@
+INTEL_G6B = intra_frame.g6b inter_frame.g6b
+INTEL_INC = vme_header.inc
+
+EXTRA_DIST = $(INTEL_G6B) \
+ $(INTEL_INC)
+
+if HAVE_GEN4ASM
+
+SUFFIXES = .asm .g6b
+.asm.g6b:
+ m4 $*.asm > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m
+
+$(INTEL_G6B): $(INTEL_INC)
+
+BUILT_SOURCES= $(INTEL_G6B)
+
+clean-local:
+ -rm -f $(INTEL_G6B)
+
+endif
diff --git a/shaders/vme/inter_frame.asm b/shaders/vme/inter_frame.asm
new file mode 100644
index 0000000..d589344
--- /dev/null
+++ b/shaders/vme/inter_frame.asm
@@ -0,0 +1,111 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame.asm
+//
+// Make intra predition estimation for Intra frame
+//
+
+//
+// Now, begin source code....
+//
+
+include(`vme_header.inc')
+
+/*
+ * __START
+ */
+__INTER_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+
+/*
+ * Media Read Message -- fetch neighbor edge pixels
+ */
+/* ROW */
+// mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
+// add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -8:W {align1}; /* X offset */
+// add (1) tmp_reg0.4<1>:D tmp_reg0.4<0,1,0>:D -1:W {align1}; /* Y offset */
+// mov (1) tmp_reg0.8<1>:UD BLOCK_32X1 {align1};
+// mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+// mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+// send (16) 0 INEP_ROW null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+// mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
+// add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -4:W {align1}; /* X offset */
+// mov (1) tmp_reg0.8<1>:UD BLOCK_4X16 {align1};
+// mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+// mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+// send (16) 0 INEP_COL0 null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+
+/*
+ * VME message
+ */
+/* m0 */
+mul (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
+mov (1) tmp_reg0.8<1>:UD tmp_reg0.0<0,1,0>:UD {align1};
+mov (1) tmp_reg0.12<1>:UD INTER_SAD_HAAR + INTRA_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */
+mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (1) tmp_reg0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
+mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+
+/* m1 */
+mov (1) tmp_reg1.4<1>:UD BI_SUB_MB_PART_MASK + MAX_NUM_MV:UD {align1}; /* Default value MAX 32 MVs */
+
+mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE + LUMA_INTRA_4x4_DISABLE {align1};
+
+// cmp.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB 0:UW {align1}; /* X != 0 */
+// (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_AE {align1}; /* A */
+
+// cmp.nz.f0.0 (1) null<1>:UW orig_y_ub<0,1,0>:UB 0:UW {align1}; /* Y != 0 */
+// (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_B {align1}; /* B */
+
+// mul.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB orig_y_ub<0,1,0>:UB {align1}; /* X * Y != 0 */
+// (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_D {align1}; /* D */
+
+// add (1) tmp_x_w<1>:W orig_x_ub<0,1,0>:UB 1:UW {align1}; /* X + 1 */
+// add (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1}; /* width - (X + 1) */
+// mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */
+// (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */
+
+mov (8) msg_reg1<1>:UD tmp_reg1.0<8,8,1>:UD {align1};
+
+/* m2 */
+mov (8) msg_reg2<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
+
+/* m3 */
+mov (8) msg_reg3<1>:UD 0x0 {align1};
+mov (16) msg_reg3.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) msg_reg3.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+send (8) 0 vme_wb null vme(BIND_IDX_VME,0,0,VME_MESSAGE_TYPE_INTER) mlen 4 rlen 4 {align1};
+
+/*
+ * Oword Block Write message
+ */
+mul (1) tmp_reg3.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD 0x4:UD {align1};
+mov (1) tmp_reg3.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (8) msg_reg0.0<1>:UD tmp_reg3.0<8,8,1>:UD {align1};
+
+mov (2) tmp_reg3.0<1>:UW vme_wb1.0<2,2,1>:UB {align1};
+
+mov (8) msg_reg1.0<1>:UD tmp_reg3.0<0,1,0>:UD {align1};
+
+mov (8) msg_reg2.0<1>:UD tmp_reg3.0<0,1,0>:UD {align1};
+
+/* bind index 3, write 4 oword, msg type: 8(OWord Block Write) */
+send (16) 0 obw_wb null write(BIND_IDX_OUTPUT, 3, 8, 1) mlen 3 rlen 1 {align1};
+
+/*
+ * kill thread
+ */
+mov (8) msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/shaders/vme/inter_frame.g6b b/shaders/vme/inter_frame.g6b
new file mode 100644
index 0000000..cc67117
--- /dev/null
+++ b/shaders/vme/inter_frame.g6b
@@ -0,0 +1,27 @@
+ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00200041, 0x24002e29, 0x004500a0, 0x00100010 },
+ { 0x00000001, 0x24080021, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x240c0061, 0x00000000, 0x00a03000 },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x24160169, 0x00000000, 0x20202020 },
+ { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
+ { 0x00000001, 0x24240061, 0x00000000, 0x0c000020 },
+ { 0x00000001, 0x243c00f1, 0x00000000, 0x00000006 },
+ { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x206000e2, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00cf0283, 0x00000000 },
+ { 0x00000001, 0x20700062, 0x00000000, 0x11111111 },
+ { 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 },
+ { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
+ { 0x00000041, 0x24680c21, 0x00000468, 0x00000004 },
+ { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0460, 0x00000000 },
+ { 0x00200001, 0x24600229, 0x004501a0, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x00000460, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x00000460, 0x00000000 },
+ { 0x05800031, 0x22001cdd, 0x00000000, 0x061b0303 },
+ { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },
diff --git a/shaders/vme/intra_frame.asm b/shaders/vme/intra_frame.asm
new file mode 100644
index 0000000..65cd58b
--- /dev/null
+++ b/shaders/vme/intra_frame.asm
@@ -0,0 +1,102 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame.asm
+//
+// Make intra predition estimation for Intra frame
+//
+
+//
+// Now, begin source code....
+//
+
+include(`vme_header.inc')
+
+/*
+ * __START
+ */
+__INTRA_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+
+/*
+ * Media Read Message -- fetch neighbor edge pixels
+ */
+/* ROW */
+mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
+add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) tmp_reg0.4<1>:D tmp_reg0.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) tmp_reg0.8<1>:UD BLOCK_32X1 {align1};
+mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+send (16) 0 INEP_ROW null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
+add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) tmp_reg0.8<1>:UD BLOCK_4X16 {align1};
+mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+send (16) 0 INEP_COL0 null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+
+/*
+ * VME message
+ */
+/* m0 */
+mul (2) tmp_reg0.8<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
+mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+
+/* m1 */
+mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE + LUMA_INTRA_4x4_DISABLE {align1};
+
+cmp.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB 0:UW {align1}; /* X != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_AE {align1}; /* A */
+
+cmp.nz.f0.0 (1) null<1>:UW orig_y_ub<0,1,0>:UB 0:UW {align1}; /* Y != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_B {align1}; /* B */
+
+mul.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB orig_y_ub<0,1,0>:UB {align1}; /* X * Y != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_D {align1}; /* D */
+
+add (1) tmp_x_w<1>:W orig_x_ub<0,1,0>:UB 1:UW {align1}; /* X + 1 */
+add (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1}; /* width - (X + 1) */
+mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */
+
+mov (8) msg_reg1<1>:UD tmp_reg1.0<8,8,1>:UD {align1};
+
+/* m2 */
+mov (8) msg_reg2<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
+
+/* m3 */
+mov (8) msg_reg3<1>:UD 0x0 {align1};
+mov (16) msg_reg3.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) msg_reg3.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+send (8) 0 vme_wb null vme(BIND_IDX_VME,0,0,VME_MESSAGE_TYPE_INTRA) mlen 4 rlen 1 {align1};
+
+/*
+ * Oword Block Write message
+ */
+mul (1) tmp_reg3.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mov (1) tmp_reg3.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (8) msg_reg0.0<1>:UD tmp_reg3<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+/* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */
+send (16) 0 obw_wb null write(BIND_IDX_OUTPUT, 0, 8, 1) mlen 2 rlen 1 {align1};
+
+/*
+ * kill thread
+ */
+mov (8) msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/shaders/vme/intra_frame.g6b b/shaders/vme/intra_frame.g6b
new file mode 100644
index 0000000..c5fc865
--- /dev/null
+++ b/shaders/vme/intra_frame.g6b
@@ -0,0 +1,46 @@
+ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+ { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+ { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
+ { 0x04800031, 0x22401cdd, 0x00000000, 0x02188004 },
+ { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc },
+ { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
+ { 0x04800031, 0x22801cdd, 0x00000000, 0x02288004 },
+ { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
+ { 0x00000001, 0x243c00f1, 0x00000000, 0x00000006 },
+ { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
+ { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000060 },
+ { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
+ { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000010 },
+ { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
+ { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000004 },
+ { 0x00000040, 0x24402e2d, 0x000000a0, 0x00010001 },
+ { 0x00000040, 0x2440352d, 0x000000a2, 0x00004440 },
+ { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 },
+ { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 },
+ { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x206000e2, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00cf0283, 0x00000000 },
+ { 0x00000001, 0x20700062, 0x00000000, 0x11111111 },
+ { 0x08600031, 0x21801cdd, 0x00000000, 0x08184000 },
+ { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
+ { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0460, 0x00000000 },
+ { 0x00000001, 0x20200022, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x20240022, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x20280022, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x202c0022, 0x00000198, 0x00000000 },
+ { 0x05800031, 0x22001cdd, 0x00000000, 0x041b0003 },
+ { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },
diff --git a/shaders/vme/vme_header.inc b/shaders/vme/vme_header.inc
new file mode 100644
index 0000000..60a61e5
--- /dev/null
+++ b/shaders/vme/vme_header.inc
@@ -0,0 +1,129 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: ME_header.inc
+//
+// Global symbols define
+//
+
+/*
+ * Constant
+ */
+define(`VME_MESSAGE_TYPE_INTER', `1')
+define(`VME_MESSAGE_TYPE_INTRA', `2')
+define(`VME_MESSAGE_TYPE_MIXED', `3')
+
+define(`BLOCK_32X1', `0x0000001F')
+define(`BLOCK_4X16', `0x000F0003')
+
+define(`LUMA_INTRA_16x16_DISABLE', `0x1')
+define(`LUMA_INTRA_8x8_DISABLE', `0x2')
+define(`LUMA_INTRA_4x4_DISABLE', `0x4')
+
+define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60')
+define(`INTRA_PRED_AVAIL_FLAG_B', `0x10')
+define(`INTRA_PRED_AVAIL_FLAG_C', `0x8')
+define(`INTRA_PRED_AVAIL_FLAG_D', `0x4')
+
+define(`BIND_IDX_VME', `0')
+define(`BIND_IDX_VME_REF0', `1')
+define(`BIND_IDX_VME_REF1', `2')
+define(`BIND_IDX_OUTPUT', `3')
+define(`BIND_IDX_INEP', `4')
+
+define(`SUB_PEL_MODE_INTEGER', `0x00000000')
+define(`SUB_PEL_MODE_HALF', `0x00001000')
+define(`SUB_PEL_MODE_QUARTER', `0x00003000')
+
+define(`INTER_SAD_NONE', `0x00000000')
+define(`INTER_SAD_HAAR', `0x00200000')
+
+define(`INTRA_SAD_NONE', `0x00000000')
+define(`INTRA_SAD_HAAR', `0x00800000')
+
+define(`REF_REGION_SIZE', `0x2020:UW')
+
+define(`BI_SUB_MB_PART_MASK', `0x0c000000')
+define(`MAX_NUM_MV', `0x00000020')
+
+define(`INTRA_PREDICTORE_MODE', `0x11111111:UD')
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r11 reserved
+ * r12 write back of VME message
+ * r13 write back of Oword Block Write
+ */
+/*
+ * GRF 0 -- header
+ */
+define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
+
+/*
+ * GRF 1~4 -- Constant Buffer (reserved)
+ */
+
+/*
+ * GRF 5 -- inline data
+ */
+define(`inline_reg0', `r5')
+define(`w_in_mb_uw', `inline_reg0.2')
+define(`orig_xy_ub', `inline_reg0.0')
+define(`orig_x_ub', `inline_reg0.0') /* in macroblock */
+define(`orig_y_ub', `inline_reg0.1')
+
+/*
+ * GRF 6~11 -- reserved
+ */
+
+/*
+ * GRF 12~15 -- write back for VME message
+ */
+define(`vme_wb', `r12')
+define(`vme_wb0', `r12')
+define(`vme_wb1', `r13')
+define(`vme_wb2', `r14')
+define(`vme_wb3', `r15')
+
+/*
+ * GRF 16 -- write back for Oword Block Write message with write commit bit
+ */
+define(`obw_wb', `r16')
+
+/*
+ * GRF 18~21 -- Intra Neighbor Edge Pixels
+ */
+define(`INEP_ROW', `r18')
+define(`INEP_COL0', `r20')
+define(`INEP_COL1', `r21')
+
+/*
+ * temporary registers
+ */
+define(`tmp_reg0', `r32')
+define(`tmp_reg1', `r33')
+define(`intra_part_mask_ub', `tmp_reg1.28')
+define(`mb_intra_struct_ub', `tmp_reg1.29')
+define(`tmp_reg2', `r34')
+define(`tmp_x_w', `tmp_reg2.0')
+define(`tmp_reg3', `r35')
+
+/*
+ * MRF registers
+ */
+define(`msg_reg0', `m0') /* m0 */
+define(`msg_reg1', `m1') /* m1 */
+define(`msg_reg2', `m2') /* m2 */
+define(`msg_reg3', `m3') /* m3 */
+
+
+
+
+