summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPengfei Qu <Pengfei.Qu@intel.com>2016-12-28 13:45:06 +0800
committerSean V Kelley <seanvk@posteo.de>2017-01-10 15:22:22 -0800
commitf704bff5dd4ea3dc6c899c93506ee52b4d0ef558 (patch)
tree77e77d774a011612f0646d2b2526d035fda80913
parentee4adf3a2ada440bbf4da4650df47f61416b7741 (diff)
ENC: add MFX command for AVC encoder
Signed-off-by: Pengfei Qu <Pengfei.Qu@intel.com> Reviewed-by: Sean V Kelley <seanvk@posteo.de>
-rwxr-xr-xsrc/gen9_avc_encoder.c399
1 files changed, 399 insertions, 0 deletions
diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c
index 629a0da..8b11c5a 100755
--- a/src/gen9_avc_encoder.c
+++ b/src/gen9_avc_encoder.c
@@ -5625,3 +5625,402 @@ gen9_avc_kernel_init(VADriverContextP ctx,
generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
}
+
+/*
+PAK pipeline related function
+*/
+extern int
+intel_avc_enc_slice_type_fixup(int slice_type);
+
+static void
+gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 5);
+
+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ (0 << 29) |
+ (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
+ (MFD_MODE_VLD << 15) |
+ (0 << 13) | /* VDEnc mode is 1*/
+ ((generic_state->curr_pak_pass != (generic_state->num_pak_passes -1)) << 10) | /* Stream-Out Enable */
+ ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
+ ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
+ (0 << 7) | /* Scaled surface enable */
+ (0 << 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */
+ (0 << 5) | /* not in stitch mode */
+ (1 << 4) | /* encoding mode */
+ (MFX_FORMAT_AVC << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 7) | /* expand NOA bus flag */
+ (0 << 6) | /* disable slice-level clock gating */
+ (0 << 5) | /* disable clock gating for NOA */
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_avc_surface_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct i965_gpe_resource *gpe_resource,
+ int id)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 6);
+
+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch, id);
+ OUT_BCS_BATCH(batch,
+ ((gpe_resource->height - 1) << 18) |
+ ((gpe_resource->width - 1) << 4));
+ OUT_BCS_BATCH(batch,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+ ((gpe_resource->pitch - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 for interleave U/V */
+ (1 << 1) | /* must be tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* must be 0 for interleave U/V */
+ (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* must be 0 for interleave U/V */
+ (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 65);
+
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
+
+ /* the DW1-3 is for pre_deblocking */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, 0);
+
+ /* the DW4-6 is for the post_deblocking */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, 0);
+
+ /* the DW7-9 is for the uncompressed_picture */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_uncompressed_input_surface.bo, 1, 0, 0);
+
+ /* the DW10-12 is for PAK information (write) */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0);//?
+
+ /* the DW13-15 is for the intra_row_store_scratch */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, 0);
+
+ /* the DW16-18 is for the deblocking filter */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, 0);
+
+ /* the DW 19-50 is for Reference pictures*/
+ for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
+ OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 1, 0);
+ }
+
+ /* DW 51, reference picture attributes */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* The DW 52-54 is for PAK information (read) */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0);
+
+ /* the DW 55-57 is the ILDB buffer */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+
+ /* the DW 58-60 is the second ILDB buffer */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+
+ /* DW 61, memory compress enable & mode */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 62-64 is the buffer */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct object_surface *obj_surface;
+ struct gen9_surface_avc *avc_priv_surface;
+ unsigned int size = 0;
+ unsigned int w_mb = generic_state->frame_width_in_mbs;
+ unsigned int h_mb = generic_state->frame_height_in_mbs;
+
+ obj_surface = encode_state->reconstructed_object;
+
+ if (!obj_surface || !obj_surface->private_data)
+ return;
+ avc_priv_surface = obj_surface->private_data;
+
+ BEGIN_BCS_BATCH(batch, 26);
+
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+ /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+ OUT_BUFFER_2DW(batch, NULL, 0, 0);
+
+ /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
+ size = w_mb * h_mb * 32 * 4;
+ OUT_BUFFER_3DW(batch,
+ avc_priv_surface->res_mv_data_surface.bo,
+ 1,
+ 0,
+ 0);
+ OUT_BUFFER_2DW(batch,
+ avc_priv_surface->res_mv_data_surface.bo,
+ 1,
+ ALIGN(size,0x1000));
+
+ /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+ OUT_BUFFER_2DW(batch, NULL, 0, 0);
+
+ /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+ OUT_BUFFER_2DW(batch, NULL, 0, 0);
+
+ /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
+ * Note: an offset is specified in MFX_AVC_SLICE_STATE
+ */
+ OUT_BUFFER_3DW(batch,
+ avc_ctx->compressed_bitstream.res.bo,
+ 1,
+ 0,
+ 0);
+ OUT_BUFFER_2DW(batch,
+ avc_ctx->compressed_bitstream.res.bo,
+ 1,
+ avc_ctx->compressed_bitstream.end_offset);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
+{
+ struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 10);
+
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+
+ /* The DW1-3 is for bsd/mpc row store scratch buffer */
+ OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, 0);
+
+ /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+
+ /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_avc_directmode_state(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+ struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
+
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 71);
+
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+ /* Reference frames and Current frames */
+ /* the DW1-32 is for the direct MV for reference */
+ for(i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
+ if ( avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
+ OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW34-36 is the MV for the current reference */
+ OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* POL list */
+ for(i = 0; i < 32; i++) {
+ OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
+ }
+ OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
+ OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_qm_state(VADriverContextP ctx,
+ int qm_type,
+ const unsigned int *qm,
+ int qm_length,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ unsigned int qm_buffer[16];
+
+ assert(qm_length <= 16);
+ assert(sizeof(*qm) == 4);
+ memset(qm_buffer,0,16*4);
+ memcpy(qm_buffer, qm, qm_length * 4);
+
+ BEGIN_BCS_BATCH(batch, 18);
+ OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+ OUT_BCS_BATCH(batch, qm_type << 0);
+ intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_avc_qm_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
+ VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
+ VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
+
+ /* TODO: add support for non flat matrix */
+ const unsigned int *qm_4x4_intra;
+ const unsigned int *qm_4x4_inter;
+ const unsigned int *qm_8x8_intra;
+ const unsigned int *qm_8x8_inter;
+
+ if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
+ && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
+ qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
+ } else {
+ VAIQMatrixBufferH264 *qm;
+ assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
+ qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
+ qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
+ qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
+ qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
+ qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
+ }
+
+ gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
+ gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
+ gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
+ gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
+}
+
+static void
+gen9_mfc_fqm_state(VADriverContextP ctx,
+ int fqm_type,
+ const unsigned int *fqm,
+ int fqm_length,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ unsigned int fqm_buffer[32];
+
+ assert(fqm_length <= 32);
+ assert(sizeof(*fqm) == 4);
+ memset(fqm_buffer,0,32*4);
+ memcpy(fqm_buffer, fqm, fqm_length * 4);
+
+ BEGIN_BCS_BATCH(batch, 34);
+ OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
+ OUT_BCS_BATCH(batch, fqm_type << 0);
+ intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
+{
+ int i, j;
+ for (i = 0; i < len; i++)
+ for (j = 0; j < len; j++)
+ fqm[i * len + j] = (1 << 16) / qm[j * len + i];
+}
+
+static void
+gen9_mfc_avc_fqm_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ /* TODO: add support for non flat matrix */
+ struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
+ VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
+ VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
+
+ if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
+ && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
+ } else {
+ int i;
+ uint32_t fqm[32];
+ VAIQMatrixBufferH264 *qm;
+ assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
+ qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
+
+ for (i = 0; i < 3; i++)
+ gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
+
+ for (i = 3; i < 6; i++)
+ gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
+
+ gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
+
+ gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
+ gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
+ }
+}