diff options
author | Pengfei Qu <Pengfei.Qu@intel.com> | 2016-12-28 13:45:06 +0800 |
---|---|---|
committer | Sean V Kelley <seanvk@posteo.de> | 2017-01-10 15:22:22 -0800 |
commit | f704bff5dd4ea3dc6c899c93506ee52b4d0ef558 (patch) | |
tree | 77e77d774a011612f0646d2b2526d035fda80913 | |
parent | ee4adf3a2ada440bbf4da4650df47f61416b7741 (diff) |
ENC: add MFX command for AVC encoder
Signed-off-by: Pengfei Qu <Pengfei.Qu@intel.com>
Reviewed-by: Sean V Kelley <seanvk@posteo.de>
-rwxr-xr-x | src/gen9_avc_encoder.c | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c index 629a0da..8b11c5a 100755 --- a/src/gen9_avc_encoder.c +++ b/src/gen9_avc_encoder.c @@ -5625,3 +5625,402 @@ gen9_avc_kernel_init(VADriverContextP ctx, generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd; generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp; } + +/* +PAK pipeline related function +*/ +extern int +intel_avc_enc_slice_type_fixup(int slice_type); + +static void +gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state; + struct intel_batchbuffer *batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, 5); + + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (0 << 29) | + (MFX_LONG_MODE << 17) | /* Must be long format for encoder */ + (MFD_MODE_VLD << 15) | + (0 << 13) | /* VDEnc mode is 1*/ + ((generic_state->curr_pak_pass != (generic_state->num_pak_passes -1)) << 10) | /* Stream-Out Enable */ + ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */ + ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */ + (0 << 7) | /* Scaled surface enable */ + (0 << 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */ + (0 << 5) | /* not in stitch mode */ + (1 << 4) | /* encoding mode */ + (MFX_FORMAT_AVC << 0)); + OUT_BCS_BATCH(batch, + (0 << 7) | /* expand NOA bus flag */ + (0 << 6) | /* disable slice-level clock gating */ + (0 << 5) | /* disable clock gating for NOA */ + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_avc_surface_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + struct i965_gpe_resource *gpe_resource, + int id) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, 6); + + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, id); + OUT_BCS_BATCH(batch, + ((gpe_resource->height - 1) << 18) | + ((gpe_resource->width - 1) << 4)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ + ((gpe_resource->pitch - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 for interleave U/V */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* must be 0 for interleave U/V */ + (gpe_resource->y_cb_offset)); /* y offset for U(cb) */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* must be 0 for interleave U/V */ + (gpe_resource->y_cb_offset)); /* y offset for U(cb) */ + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx; + struct intel_batchbuffer *batch = encoder_context->base.batch; + int i; + + BEGIN_BCS_BATCH(batch, 65); + + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2)); + + /* the DW1-3 is for pre_deblocking */ + OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, 0); + + /* the DW4-6 is for the post_deblocking */ + OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, 0); + + /* the DW7-9 is for the uncompressed_picture */ + OUT_BUFFER_3DW(batch, avc_ctx->res_uncompressed_input_surface.bo, 1, 0, 0); + + /* the DW10-12 is for PAK information (write) */ + OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0);//? + + /* the DW13-15 is for the intra_row_store_scratch */ + OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, 0); + + /* the DW16-18 is for the deblocking filter */ + OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, 0); + + /* the DW 19-50 is for Reference pictures*/ + for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) { + OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 1, 0); + } + + /* DW 51, reference picture attributes */ + OUT_BCS_BATCH(batch, 0); + + /* The DW 52-54 is for PAK information (read) */ + OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0); + + /* the DW 55-57 is the ILDB buffer */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + + /* the DW 58-60 is the second ILDB buffer */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + + /* DW 61, memory compress enable & mode */ + OUT_BCS_BATCH(batch, 0); + + /* the DW 62-64 is the buffer */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state; + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct object_surface *obj_surface; + struct gen9_surface_avc *avc_priv_surface; + unsigned int size = 0; + unsigned int w_mb = generic_state->frame_width_in_mbs; + unsigned int h_mb = generic_state->frame_height_in_mbs; + + obj_surface = encode_state->reconstructed_object; + + if (!obj_surface || !obj_surface->private_data) + return; + avc_priv_surface = obj_surface->private_data; + + BEGIN_BCS_BATCH(batch, 26); + + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); + /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + OUT_BUFFER_2DW(batch, NULL, 0, 0); + + /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */ + size = w_mb * h_mb * 32 * 4; + OUT_BUFFER_3DW(batch, + avc_priv_surface->res_mv_data_surface.bo, + 1, + 0, + 0); + OUT_BUFFER_2DW(batch, + avc_priv_surface->res_mv_data_surface.bo, + 1, + ALIGN(size,0x1000)); + + /* The DW11-15 is for MFX IT-COFF. Not used on encoder */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + OUT_BUFFER_2DW(batch, NULL, 0, 0); + + /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + OUT_BUFFER_2DW(batch, NULL, 0, 0); + + /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder + * Note: an offset is specified in MFX_AVC_SLICE_STATE + */ + OUT_BUFFER_3DW(batch, + avc_ctx->compressed_bitstream.res.bo, + 1, + 0, + 0); + OUT_BUFFER_2DW(batch, + avc_ctx->compressed_bitstream.res.bo, + 1, + avc_ctx->compressed_bitstream.end_offset); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx; + struct intel_batchbuffer *batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, 10); + + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); + + /* The DW1-3 is for bsd/mpc row store scratch buffer */ + OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, 0); + + /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + + /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */ + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_avc_directmode_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx; + struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state; + + int i; + + BEGIN_BCS_BATCH(batch, 71); + + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); + + /* Reference frames and Current frames */ + /* the DW1-32 is for the direct MV for reference */ + for(i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) { + if ( avc_ctx->res_direct_mv_buffersr[i].bo != NULL) { + OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[i].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + } + + OUT_BCS_BATCH(batch, 0); + + /* the DW34-36 is the MV for the current reference */ + OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* POL list */ + for(i = 0; i < 32; i++) { + OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]); + } + OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]); + OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_qm_state(VADriverContextP ctx, + int qm_type, + const unsigned int *qm, + int qm_length, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + unsigned int qm_buffer[16]; + + assert(qm_length <= 16); + assert(sizeof(*qm) == 4); + memset(qm_buffer,0,16*4); + memcpy(qm_buffer, qm, qm_length * 4); + + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, qm_type << 0); + intel_batchbuffer_data(batch, qm_buffer, 16 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_avc_qm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state; + VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param; + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param; + + /* TODO: add support for non flat matrix */ + const unsigned int *qm_4x4_intra; + const unsigned int *qm_4x4_inter; + const unsigned int *qm_8x8_intra; + const unsigned int *qm_8x8_inter; + + if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag + && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) { + qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat; + } else { + VAIQMatrixBufferH264 *qm; + assert(encode_state->q_matrix && encode_state->q_matrix->buffer); + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; + qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0]; + qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3]; + qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0]; + qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1]; + } + + gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context); + gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context); + gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context); + gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context); +} + +static void +gen9_mfc_fqm_state(VADriverContextP ctx, + int fqm_type, + const unsigned int *fqm, + int fqm_length, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + unsigned int fqm_buffer[32]; + + assert(fqm_length <= 32); + assert(sizeof(*fqm) == 4); + memset(fqm_buffer,0,32*4); + memcpy(fqm_buffer, fqm, fqm_length * 4); + + BEGIN_BCS_BATCH(batch, 34); + OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2)); + OUT_BCS_BATCH(batch, fqm_type << 0); + intel_batchbuffer_data(batch, fqm_buffer, 32 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len) +{ + int i, j; + for (i = 0; i < len; i++) + for (j = 0; j < len; j++) + fqm[i * len + j] = (1 << 16) / qm[j * len + i]; +} + +static void +gen9_mfc_avc_fqm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + /* TODO: add support for non flat matrix */ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state; + VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param; + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param; + + if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag + && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) { + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context); + } else { + int i; + uint32_t fqm[32]; + VAIQMatrixBufferH264 *qm; + assert(encode_state->q_matrix && encode_state->q_matrix->buffer); + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; + + for (i = 0; i < 3; i++) + gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context); + + for (i = 3; i < 6; i++) + gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context); + + gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context); + + gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context); + } +} |