summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhou Chang <chang.zhou@intel.com>2011-04-14 13:37:45 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2011-04-14 13:43:44 +0800
commit1c01ad19a64682ef100ca0b8695475772c86a1a5 (patch)
treedc627c9a8e4bcb8914949b91f2839c49758f3ea0
parent4adeed485f454349c1dc0410da77e001cf5e3996 (diff)
add inter frame support in vme.
-rw-r--r--gen6_mfc.c21
-rw-r--r--gen6_vme.c218
-rw-r--r--shaders/vme/Makefile.am2
-rw-r--r--shaders/vme/inter_frame.asm47
-rw-r--r--shaders/vme/inter_frame.g6b14
5 files changed, 268 insertions, 34 deletions
diff --git a/gen6_mfc.c b/gen6_mfc.c
index fd9f93c..597d642 100644
--- a/gen6_mfc.c
+++ b/gen6_mfc.c
@@ -220,11 +220,11 @@ gen6_mfc_avc_img_state(VADriverContextP ctx)
OUT_BCS_BATCH(ctx, 0); /*Mainly about MB rate control and debug, just ignoring*/
OUT_BCS_BATCH(ctx, /*Inter and Intra Conformance Max size limit*/
(0xBB8 << 16) | /*InterMbMaxSz*/
- (0xBB8) ); /*IntraMbMaxSz*/
+ (0xEE8) ); /*IntraMbMaxSz*/
OUT_BCS_BATCH(ctx, 0); /*Reserved*/
OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/
OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/
- OUT_BCS_BATCH(ctx, 0x80200000);
+ OUT_BCS_BATCH(ctx, 0x8C000000);
OUT_BCS_BATCH(ctx, 0x00010000);
OUT_BCS_BATCH(ctx, 0);
@@ -412,7 +412,7 @@ gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, in
return len_in_dwords;
}
-static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, dri_bo *bo)
+static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, dri_bo *bo, unsigned int offset)
{
int len_in_dwords = 11;
@@ -420,14 +420,14 @@ static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int
OUT_BCS_BATCH(ctx, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
- OUT_BCS_BATCH(ctx, 32); /* 1 MV : SKIP*/
+ OUT_BCS_BATCH(ctx, 32); /* 32 MV*/
OUT_BCS_RELOC(ctx, bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- 0);
+ offset);
OUT_BCS_BATCH(ctx,
(1 << 24) | /* PackedMvNum, Debug*/
- (4 << 20) | /* 8 MV*/
+ (4 << 20) | /* 8 MV, SNB don't use it*/
(1 << 19) | /* CbpDcY */
(1 << 18) | /* CbpDcU */
(1 << 17) | /* CbpDcV */
@@ -525,7 +525,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, void *obj)
struct gen6_media_state *media_state = &i965->gen6_media_state;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
- unsigned int *msg;
+ unsigned int *msg, offset;
int emit_new_state = 1, object_len_in_bytes;
int is_intra = pSliceParameter->slice_flags.bits.is_intra;
@@ -535,8 +535,10 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, void *obj)
msg = (unsigned int *)media_state->vme_output.bo->virtual;
if ( is_intra == 0) { /*TODO: simulate VME result, [0,0] MVs*/
- memset(media_state->vme_output.bo->virtual, 0, 128);
+ //memset(media_state->vme_output.bo->virtual, 0, 128);
+ //printf("msgs = %08x, %08x, %08x, %08x \n", msg[0], msg[1], msg[2], msg[3]);
dri_bo_unmap(media_state->vme_output.bo);
+ offset = 0;
}
for (y = 0; y < height_in_mbs; y++) {
@@ -564,7 +566,8 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, void *obj)
if ( is_intra ) {
object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg);
} else {
- object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, media_state->vme_output.bo);
+ object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, media_state->vme_output.bo,offset);
+ offset += 64;
}
msg += 4;
diff --git a/gen6_vme.c b/gen6_vme.c
index 8a6f041..cf60e5c 100644
--- a/gen6_vme.c
+++ b/gen6_vme.c
@@ -51,6 +51,11 @@ static uint32_t gen6_vme_intra_frame[][4] = {
{0,0,0,0}
};
+static uint32_t gen6_vme_inter_frame[][4] = {
+#include "shaders/vme/inter_frame.g6b"
+ {0,0,0,0}
+};
+
static struct media_kernel gen6_vme_kernels[] = {
{
"VME Intra Frame",
@@ -58,10 +63,17 @@ static struct media_kernel gen6_vme_kernels[] = {
gen6_vme_intra_frame,
sizeof(gen6_vme_intra_frame),
NULL
+ },
+ {
+ "VME inter Frame",
+ VME_INTER_SHADER,
+ gen6_vme_inter_frame,
+ sizeof(gen6_vme_inter_frame),
+ NULL
}
};
-#define GEN6_VME_KERNEL_NUMBER ARRAY_ELEMS(gen6_vme_kernels)
+#define GEN6_VME_KERNEL_NUMBER 2
static void
gen6_vme_set_source_surface_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
@@ -114,6 +126,7 @@ static void gen6_vme_source_surface_state(VADriverContextP ctx,
ss->ss0.surface_base_address = obj_surface->bo->offset;
+ ss->ss1.cbcr_pixel_offset_v_direction = 2;
ss->ss1.width = w - 1;
ss->ss1.height = h - 1;
@@ -121,6 +134,7 @@ static void gen6_vme_source_surface_state(VADriverContextP ctx,
ss->ss2.interleave_chroma = 1;
ss->ss2.pitch = w_pitch - 1;
ss->ss2.half_pitch_for_chroma = 0;
+
gen6_vme_set_source_surface_tiling(ss, tiling);
/* UV offset for interleave mode */
@@ -154,8 +168,14 @@ gen6_vme_output_buffer_setup(VADriverContextP ctx,
int width_in_mbs = ALIGN(obj_context->picture_width, 16) / 16;
int height_in_mbs = ALIGN(obj_context->picture_height, 16) / 16;
int num_entries;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
- media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+ if ( is_intra ) {
+ media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+ } else {
+ media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4;
+ }
media_state->vme_output.size_block = 16; /* an OWORD */
media_state->vme_output.pitch = ALIGN(media_state->vme_output.size_block, 16);
bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -199,7 +219,8 @@ gen6_vme_output_buffer_setup(VADriverContextP ctx,
static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
VAContextID context,
- struct mfc_encode_state *encode_state)
+ struct mfc_encode_state *encode_state,
+ int is_intra)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_media_state *media_state = &i965->gen6_media_state;
@@ -214,14 +235,16 @@ static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
obj_surface = SURFACE(encode_state->current_render_target);
assert(obj_surface);
gen6_vme_source_surface_state(ctx, 0, obj_surface);
- /* reference 0, FIXME: must check it is valid or not */
- obj_surface = SURFACE(pPicParameter->reference_picture);
- assert(obj_surface);
- // gen6_vme_source_surface_state(ctx, 1, obj_surface)
- /* reference 1, FIXME: */
- // obj_surface = SURFACE(pPicParameter->reference_picture);
- // assert(obj_surface);
- // gen6_vme_source_surface_state(ctx, 2, obj_surface);
+ if ( ! is_intra ) {
+ /* reference 0 */
+ obj_surface = SURFACE(pPicParameter->reference_picture);
+ assert(obj_surface);
+ gen6_vme_source_surface_state(ctx, 1, obj_surface);
+ /* reference 1, FIXME: */
+ // obj_surface = SURFACE(pPicParameter->reference_picture);
+ // assert(obj_surface);
+ //gen6_vme_source_surface_state(ctx, 2, obj_surface);
+ }
/* VME output */
gen6_vme_output_buffer_setup(ctx, context, encode_state, 3);
@@ -250,7 +273,8 @@ static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
VAContextID context,
- struct mfc_encode_state *encode_state)
+ struct mfc_encode_state *encode_state,
+ int is_intra)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_media_state *media_state = &i965->gen6_media_state;
@@ -263,9 +287,14 @@ static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
assert(bo->virtual);
desc = bo->virtual;
- for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
- /*Load kernel into GPU memory*/
- struct media_kernel *kernel = &gen6_vme_kernels[i];
+ for (i = 0; i < 1; i++) {
+ /*Load kernel into GPU memory*/
+ struct media_kernel *kernel;
+ if ( is_intra) {
+ kernel = &gen6_vme_kernels[0];
+ } else {
+ kernel = &gen6_vme_kernels[1];
+ }
/*Setup the descritor table*/
memset(desc, 0, sizeof(*desc));
@@ -292,7 +321,7 @@ static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
/*binding table*/
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
- 1, //One Entry
+ 4, //One Entry
offsetof(struct gen6_interface_descriptor_data, desc3),
media_state->binding_table.bo);
desc++;
@@ -321,7 +350,7 @@ static VAStatus gen6_vme_constant_setup(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
-static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, VAContextID context, struct mfc_encode_state *encode_state)
+static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, VAContextID context, struct mfc_encode_state *encode_state, int is_intra)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_media_state *media_state = &i965->gen6_media_state;
@@ -334,7 +363,7 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, VAContextID conte
vme_state_message = (unsigned int *)media_state->vme_state.bo->virtual;
for(i = 0;i < 32; i++) {
- vme_state_message[i] = 0x00;
+ vme_state_message[i] = 0x11;
}
vme_state_message[16] = 0x42424242; //cost function LUT set 0 for Intra
@@ -428,7 +457,7 @@ static void gen6_vme_idrt(VADriverContextP ctx)
ADVANCE_BATCH(ctx);
}
-static int gen6_vme_media_object(VADriverContextP ctx,
+static int gen6_vme_media_object_intra(VADriverContextP ctx,
VAContextID context,
struct mfc_encode_state *encode_state,
int mb_x, int mb_y)
@@ -561,6 +590,139 @@ static int gen6_vme_media_object(VADriverContextP ctx,
return len_in_dowrds * 4;
}
+static int gen6_vme_media_object_inter(VADriverContextP ctx,
+ VAContextID context,
+ struct mfc_encode_state *encode_state,
+ int mb_x, int mb_y)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE( encode_state->current_render_target);
+ int i;
+ unsigned char *pPixel[17];
+ int pitch = obj_surface->width;
+ int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
+ int len_in_dowrds = 6 + 32 + 8;
+
+ BEGIN_BATCH(ctx, len_in_dowrds);
+
+ OUT_BATCH(ctx, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
+ OUT_BATCH(ctx, VME_INTRA_SHADER); /*Interface Descriptor Offset*/
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+
+ /*inline data */
+ OUT_BATCH(ctx, ((mb_y << 20)) | ((mb_x<<4)) ); /*M0.0 Refrence0 X,Y*/
+ OUT_BATCH(ctx, 0x00000000); /*M0.1 Refrence1 X,Y, not used in P frame*/
+ OUT_BATCH(ctx, (mb_y<<20) |
+ (mb_x<<4)); /*M0.2 Source X,Y*/
+ OUT_BATCH(ctx, 0x00A03000); /*M0.3 16x16 Source, 1/4 pixel, harr*/
+ OUT_BATCH(ctx, 0x00000000); /*M0.4 Ignored*/
+ OUT_BATCH(ctx, 0x20200000); /*M0.5 Reference Width&Height, 32x32*/
+ OUT_BATCH(ctx, 0x00000000); /*M0.6 Debug*/
+ OUT_BATCH(ctx, 0x00000000); /*M0.7 Debug*/
+
+ OUT_BATCH(ctx, 0x00000000); /*M1.0 Default value*/
+ OUT_BATCH(ctx, 0x0C000020); /*M1.1 Default value MAX 32 MVs*/
+ OUT_BATCH(ctx, 0x00000000); /*M1.2 Default value*/
+ OUT_BATCH(ctx, 0x00000000); /*M1.3 Default value*/
+ OUT_BATCH(ctx, 0x00000000); /*M1.4 Default value*/
+ OUT_BATCH(ctx, 0x00000000); /*M1.5 Default value*/
+ OUT_BATCH(ctx, 0x00000000); /*M1.6 Default value*/
+
+ i = 0;
+ if ( mb_x > 0)
+ i |= 0x60;
+ if ( mb_y > 0)
+ i |= 0x10;
+ if ( mb_x > 0 && mb_y > 0)
+ i |= 0x08;
+ if ( mb_x > 0 && mb_y > 0 && mb_x < (mb_width - 1) )
+ i |= 0x04;
+ OUT_BATCH(ctx, (i << 8) | 6 ); /*M1.7 Neighbor MBS and Intra mode masks*/
+
+ drm_intel_gem_bo_map_gtt( obj_surface->bo );
+ for(i = 0; i < 17; i++){
+ pPixel[i] = (unsigned char *) ( obj_surface->bo->virtual + mb_x * 16 - 1 + ( mb_y * 16 - 1 + i) * pitch);
+ }
+
+ OUT_BATCH(ctx, 0); /*M2.0 MBZ*/
+ OUT_BATCH(ctx, pPixel[0][0] << 24); /*M2.1 Corner Neighbor*/
+
+ OUT_BATCH(ctx, ( (pPixel[0][4] << 24)
+ | (pPixel[0][3] << 16)
+ | (pPixel[0][2] << 8)
+ | (pPixel[0][1] ) )); /*M2.2 */
+
+ OUT_BATCH(ctx, ( (pPixel[0][8] << 24)
+ | (pPixel[0][7] << 16)
+ | (pPixel[0][6] << 8)
+ | (pPixel[0][5] ) )); /*M2.3 */
+
+ OUT_BATCH(ctx, ( (pPixel[0][12] << 24)
+ | (pPixel[0][11] << 16)
+ | (pPixel[0][10] << 8)
+ | (pPixel[0][9] ) )); /*M2.4 */
+
+ OUT_BATCH(ctx, ( (pPixel[0][16] << 24)
+ | (pPixel[0][15] << 16)
+ | (pPixel[0][14] << 8)
+ | (pPixel[0][13] ) )); /*M2.5 */
+
+ OUT_BATCH(ctx, ( (pPixel[0][20] << 24)
+ | (pPixel[0][19] << 16)
+ | (pPixel[0][18] << 8)
+ | (pPixel[0][17] ) )); /*M2.6 */
+
+ OUT_BATCH(ctx, ( (pPixel[0][24] << 24)
+ | (pPixel[0][23] << 16)
+ | (pPixel[0][22] << 8)
+ | (pPixel[0][21] ) )); /*M2.7 */
+
+ OUT_BATCH(ctx, ( (pPixel[4][0] << 24)
+ | (pPixel[3][0] << 16)
+ | (pPixel[2][0] << 8)
+ | (pPixel[1][0] ) )); /*M3.0 */
+
+ OUT_BATCH(ctx, ( (pPixel[8][0] << 24)
+ | (pPixel[7][0] << 16)
+ | (pPixel[6][0] << 8)
+ | (pPixel[5][0] ) )); /*M3.1 */
+
+ OUT_BATCH(ctx, ( (pPixel[12][0] << 24)
+ | (pPixel[11][0] << 16)
+ | (pPixel[10][0] << 8)
+ | (pPixel[9][0] ) )); /*M3.2 */
+
+ OUT_BATCH(ctx, ( (pPixel[0][0] << 24)
+ | (pPixel[15][0] << 16)
+ | (pPixel[14][0] << 8)
+ | (pPixel[13][0] ) )); /*M3.3 */
+
+ OUT_BATCH(ctx, 0x11111111); /*M3.4*/
+ OUT_BATCH(ctx, 0x00000000); /*M3.5*/
+ OUT_BATCH(ctx, 0x00000000); /*M3.6*/
+ OUT_BATCH(ctx, 0x00000000); /*M3.7*/
+
+ OUT_BATCH(ctx, 0); /*Write Message Header M0.0*/
+ OUT_BATCH(ctx, 0); /*Write Message Header M0.1*/
+
+ OUT_BATCH(ctx, (mb_y * mb_width + mb_x) * 4); /*Write Message Header M0.2*/
+ OUT_BATCH(ctx, 0x00000000); /*Write Message Header M0.3*/
+
+ OUT_BATCH(ctx, 0x00000000);
+ OUT_BATCH(ctx, 0x00000000);
+ OUT_BATCH(ctx, 0x00000000);
+ OUT_BATCH(ctx, 0x00000000);
+
+ drm_intel_gem_bo_unmap_gtt( obj_surface->bo );
+
+ ADVANCE_BATCH(ctx);
+
+ return len_in_dowrds * 4;
+}
+
static void gen6_vme_media_init(VADriverContextP ctx)
{
int i;
@@ -627,6 +789,8 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
int height_in_mbs = (obj_context->picture_height + 15) / 16;
int x, y;
int emit_new_state = 1, object_len_in_bytes;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
intel_batchbuffer_start_atomic(ctx, 0x1000);
@@ -643,7 +807,7 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
ADVANCE_BATCH(ctx);
/*Step2: State command PIPELINE_SELECT*/
- gen6_vme_pipeline_select(ctx);
+ gen6_vme_pipeline_select(ctx);
/*Step3: State commands configuring pipeline states*/
gen6_vme_state_base_address(ctx);
@@ -655,7 +819,11 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
}
/*Step4: Primitive commands*/
- object_len_in_bytes = gen6_vme_media_object(ctx, context, encode_state, x, y);
+ if ( is_intra ) {
+ object_len_in_bytes = gen6_vme_media_object_intra(ctx, context, encode_state, x, y);
+ } else {
+ object_len_in_bytes = gen6_vme_media_object_inter(ctx, context, encode_state, x, y);
+ }
if (intel_batchbuffer_check_free_space(ctx, object_len_in_bytes) == 0) {
intel_batchbuffer_end_atomic(ctx);
@@ -674,12 +842,14 @@ static VAStatus gen6_vme_prepare(VADriverContextP ctx,
struct mfc_encode_state *encode_state)
{
VAStatus vaStatus = VA_STATUS_SUCCESS;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
/*Setup all the memory object*/
- gen6_vme_surface_setup(ctx, context, encode_state);
- gen6_vme_interface_setup(ctx, context, encode_state);
+ gen6_vme_surface_setup(ctx, context, encode_state, is_intra);
+ gen6_vme_interface_setup(ctx, context, encode_state, is_intra);
gen6_vme_constant_setup(ctx, context, encode_state);
- gen6_vme_vme_state_setup(ctx, context, encode_state);
+ gen6_vme_vme_state_setup(ctx, context, encode_state, is_intra);
/*Programing media pipeline*/
gen6_vme_pipeline_programing(ctx, context, encode_state);
diff --git a/shaders/vme/Makefile.am b/shaders/vme/Makefile.am
index c3d029d..7945d66 100644
--- a/shaders/vme/Makefile.am
+++ b/shaders/vme/Makefile.am
@@ -1,4 +1,4 @@
-INTEL_G6B = intra_frame.g6b
+INTEL_G6B = intra_frame.g6b inter_frame.g6b
EXTRA_DIST = $(INTEL_G6B)
diff --git a/shaders/vme/inter_frame.asm b/shaders/vme/inter_frame.asm
new file mode 100644
index 0000000..33d3d1b
--- /dev/null
+++ b/shaders/vme/inter_frame.asm
@@ -0,0 +1,47 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame.asm
+//
+// Make intra predition estimation for Intra frame
+//
+
+//
+// Now, begin source code....
+//
+
+include(`vme_header.inc')
+
+/*inline input data: r5~r11*/
+mov(1) r5.20<1>:UB r0.20<1,1,0>:UB {align1} ;
+mov(8) m0.0<1>:UD r5.0<8,8,1>:UD {align1};
+mov(8) m1.0<1>:UD r6.0<8,8,1>:UD {align1};
+mov(8) m2.0<1>:UD r7.0<8,8,1>:UD {align1};
+mov(8) m3.0<1>:UD r8.0<8,8,1>:UD {align1};
+send(8) 0 r12 null vme(0,0,0,1) mlen 4 rlen 4 {align1};
+
+mov(1) r9.20<1>:UB r0.20<1,1,0>:UB {align1} ;
+mov(8) m0.0<1>:UD r9.0<8,8,1>:UD {align1 mask_disable};
+/*
+mov(8) m1.0<1>:UD r13.0<8,8,1>:UD {align1 mask_disable};
+mov(8) m2.0<1>:UD r14.0<8,8,1>:UD {align1 mask_disable};
+*/
+mov(2) r9.0<1>:UW r13.0<2,2,1>:UB {align1 mask_disable};
+mov(8) m1.0<1>:UD r9.0<8,8,0>:UD {align1 mask_disable};
+mov(8) m2.0<1>:UD r9.0<8,8,0>:UD {align1 mask_disable};
+
+/*
+mov(16) m1.2<8:8:2>:UW r13.1<0,0,0>:UB {align1};
+*/
+
+/* bind index 3, write 2 oword, msg type: 8(OWord Block Write) */
+send (16) 0 r13 null write(3, 3, 8, 1) mlen 3 rlen 1 {align1};
+
+mov (8) m0.0<1>:UD r0<8,8,1>:UD {align1};
+send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
diff --git a/shaders/vme/inter_frame.g6b b/shaders/vme/inter_frame.g6b
new file mode 100644
index 0000000..fa3567a
--- /dev/null
+++ b/shaders/vme/inter_frame.g6b
@@ -0,0 +1,14 @@
+ { 0x00000001, 0x20b40231, 0x00200014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d00a0, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d00e0, 0x00000000 },
+ { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+ { 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 },
+ { 0x00000001, 0x21340231, 0x00200014, 0x00000000 },
+ { 0x00600201, 0x20000022, 0x008d0120, 0x00000000 },
+ { 0x00200201, 0x21200229, 0x004501a0, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008c0120, 0x00000000 },
+ { 0x00600201, 0x20400022, 0x008c0120, 0x00000000 },
+ { 0x05800031, 0x21a01cdd, 0x00000000, 0x061b0303 },
+ { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },