From d63fee8e20a8e09b9acea90366c4431d6d35f8b9 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 24 Dec 2016 00:58:04 +0800 Subject: Support AVC VDEnc on KBL I verified AVC VDEnc on KBL with the HuC loading patch from https://patchwork.freedesktop.org/api/1.0/series/16584/revisions/1/mbox/ Signed-off-by: Xiang, Haihao Reviewed-by: Kelley, Sean V Tested-by: Tang, FuweiX --- src/gen9_vdenc.c | 231 +++++++++++++++++++++++++++++++++++++++++++++---- src/gen9_vdenc.h | 7 +- src/i965_defines.h | 1 + src/i965_device_info.c | 3 + 4 files changed, 224 insertions(+), 18 deletions(-) diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c index 6402d41..1913a67 100644 --- a/src/gen9_vdenc.c +++ b/src/gen9_vdenc.c @@ -41,6 +41,9 @@ #include "intel_media.h" #include "gen9_vdenc.h" +extern int +intel_avc_enc_slice_type_fixup(int slice_type); + static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = { 0, 0, -8, -12, -16, -20, -28, -36, 0, 0, -4, -8, -12, -16, -24, -32, @@ -2050,6 +2053,7 @@ gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx, } pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable; + pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd; pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs; @@ -2751,6 +2755,86 @@ gen9_vdenc_vdenc_walker_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } +static void +gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + + BEGIN_BCS_BATCH(batch, 3); + + OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2)); + + if (pic_param->pic_fields.bits.weighted_pred_flag == 1) { + OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 | + slice_param->luma_weight_l0[1] << 16 | + slice_param->luma_offset_l0[0] << 8 | + slice_param->luma_weight_l0[0] << 0)); + OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 | + slice_param->luma_weight_l0[2] << 0)); + } else { + OUT_BCS_BATCH(batch, (0 << 24 | + 1 << 16 | + 0 << 8 | + 1 << 0)); + OUT_BCS_BATCH(batch, (0 << 8 | + 1 << 0)); + } + + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen95_vdenc_vdenc_walker_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param, + VAEncSliceParameterBufferH264 *next_slice_param) +{ + struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos; + int luma_log2_weight_denom, weighted_pred_idc; + + slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs; + slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs; + + if (next_slice_param) { + next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs; + next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs; + } else { + next_slice_hor_pos = 0; + next_slice_ver_pos = vdenc_context->frame_height_in_mbs; + } + + if (slice_type == SLICE_TYPE_P) + weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + else + weighted_pred_idc = 0; + + if (weighted_pred_idc == 1) + luma_log2_weight_denom = slice_param->luma_log2_weight_denom; + else + luma_log2_weight_denom = 0; + + BEGIN_BCS_BATCH(batch, 4); + + OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2)); + OUT_BCS_BATCH(batch, (slice_hor_pos << 16 | + slice_ver_pos)); + OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 | + next_slice_ver_pos)); + OUT_BCS_BATCH(batch, luma_log2_weight_denom); + + ADVANCE_BCS_BATCH(batch); +} + static void gen9_vdenc_vdenc_img_state(VADriverContextP ctx, struct encode_state *encode_state, @@ -2766,9 +2850,6 @@ gen9_vdenc_vdenc_img_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } -extern int -intel_avc_enc_slice_type_fixup(int slice_type); - static void gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, @@ -2802,7 +2883,8 @@ static void gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context, - int slice_index) + int slice_index, + unsigned int insert_one_zero_byte) { VAEncPackedHeaderParameterBuffer *param = NULL; unsigned int length_in_bits; @@ -2846,13 +2928,28 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx, 0, !param->has_emulation_bytes, 0); + + insert_one_zero_byte = 0; + } + + /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */ + if (insert_one_zero_byte) { + unsigned int insert_data[] = { 0, }; + + gen9_vdenc_mfx_avc_insert_object(ctx, + encoder_context, + insert_data, + 1, + 8, + 1, + 0, 0, 0, 0); } if (slice_header_index == -1) { VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; - unsigned char *slice_header = NULL; + unsigned char *slice_header = NULL, *slice_header1 = NULL; int slice_header_length_in_bits = 0; /* No slice header data is passed. And the driver needs to generate it */ @@ -2861,9 +2958,17 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx, pic_param, slice_params, &slice_header); + + slice_header1 = slice_header; + + if (insert_one_zero_byte) { + slice_header1 += 1; + slice_header_length_in_bits -= 8; + } + gen9_vdenc_mfx_avc_insert_object(ctx, encoder_context, - (unsigned int *)slice_header, + (unsigned int *)slice_header1, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, 5, /* first 5 bytes are start code + nal unit type */ @@ -2873,20 +2978,31 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx, free(slice_header); } else { unsigned int skip_emul_byte_cnt; + unsigned char *slice_header1 = NULL; header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer; param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer); length_in_bits = param->bit_length; + slice_header1 = (unsigned char *)header_data; + + if (insert_one_zero_byte) { + slice_header1 += 1; + length_in_bits -= 8; + } + /* as the slice header is the last header data for one slice, * the last header flag is set to one. */ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); + if (insert_one_zero_byte) + skip_emul_byte_cnt -= 1; + gen9_vdenc_mfx_avc_insert_object(ctx, encoder_context, - header_data, + (unsigned int *)slice_header1, ALIGN(length_in_bits, 32) >> 5, length_in_bits & 0x1f, skip_emul_byte_cnt, @@ -2910,8 +3026,11 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx, int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS); unsigned int internal_rate_mode = vdenc_context->internal_rate_mode; unsigned int skip_emul_byte_cnt; + unsigned int insert_one_zero_byte = 0; if (slice_index == 0) { + insert_one_zero_byte = 1; + if (encode_state->packed_header_data[idx]) { VAEncPackedHeaderParameterBuffer *param = NULL; unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; @@ -2932,6 +3051,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx, 0, !param->has_emulation_bytes, 0); + + insert_one_zero_byte = 0; } idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS); @@ -2957,6 +3078,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx, 0, !param->has_emulation_bytes, 0); + + insert_one_zero_byte = 0; } idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI); @@ -2981,15 +3104,21 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx, 0, !param->has_emulation_bytes, 0); + + insert_one_zero_byte = 0; } else if (internal_rate_mode == I965_BRC_CBR) { /* TODO: insert others */ } } + if (vdenc_context->is_frame_level_vdenc) + insert_one_zero_byte = 0; + gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx, encode_state, encoder_context, - slice_index); + slice_index, + insert_one_zero_byte); } static void @@ -3233,6 +3362,7 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx, VAEncSliceParameterBufferH264 *next_slice_param, int slice_index) { + struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context; VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param); @@ -3252,6 +3382,18 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx, encoder_context, slice_param, slice_index); + + if (!vdenc_context->is_frame_level_vdenc) { + gen95_vdenc_vdecn_weihgtsoffsets_state(ctx, + encode_state, + encoder_context, + slice_param); + gen95_vdenc_vdenc_walker_state(ctx, + encode_state, + encoder_context, + slice_param, + next_slice_param); + } } static void @@ -3259,12 +3401,12 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch = encoder_context->base.batch; struct gpe_mi_flush_dw_parameter mi_flush_dw_params; VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param; int i, j; int slice_index = 0; - int is_frame_level_vdenc = 1; /* TODO: check it for SKL */ int has_tail = 0; /* TODO: check it later */ for (j = 0; j < encode_state->num_slice_params_ext; j++) { @@ -3287,22 +3429,47 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx, slice_param, next_slice_param, slice_index); - slice_param++; - slice_index++; - if (is_frame_level_vdenc) + if (vdenc_context->is_frame_level_vdenc) break; else { - /* TODO: remove assert(0) and add other commands here */ - assert(0); + struct vd_pipeline_flush_parameter pipeline_flush_params; + int insert_mi_flush; + + memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params)); + + if (next_slice_group_param) { + pipeline_flush_params.mfx_pipeline_done = 0; + insert_mi_flush = 1; + } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) { + pipeline_flush_params.mfx_pipeline_done = 0; + insert_mi_flush = 1; + } else { + pipeline_flush_params.mfx_pipeline_done = !has_tail; + insert_mi_flush = 0; + } + + pipeline_flush_params.vdenc_pipeline_done = 1; + pipeline_flush_params.vdenc_pipeline_command_flush = 1; + pipeline_flush_params.vd_command_message_parser_done = 1; + gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params); + + if (insert_mi_flush) { + memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params)); + mi_flush_dw_params.video_pipeline_cache_invalidate = 1; + gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params); + } } + + slice_param++; + slice_index++; } - if (is_frame_level_vdenc) + if (vdenc_context->is_frame_level_vdenc) break; } - if (is_frame_level_vdenc) { + if (vdenc_context->is_frame_level_vdenc) { struct vd_pipeline_flush_parameter pipeline_flush_params; gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context); @@ -3653,6 +3820,36 @@ gen9_vdenc_allocate_resources(VADriverContextP ctx, "HuC Status buffer"); } +static void +gen9_vdenc_hw_interfaces_init(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + struct gen9_vdenc_context *vdenc_context) +{ + vdenc_context->is_frame_level_vdenc = 1; +} + +static void +gen95_vdenc_hw_interfaces_init(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + struct gen9_vdenc_context *vdenc_context) +{ + vdenc_context->use_extended_pak_obj_cmd = 1; +} + +static void +vdenc_hw_interfaces_init(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + struct gen9_vdenc_context *vdenc_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + if (IS_KBL(i965->intel.device_info)) { + gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context); + } else { + gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context); + } +} + static VAStatus gen9_vdenc_context_get_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context, @@ -3680,7 +3877,9 @@ gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *enco vdenc_context->num_passes = 1; vdenc_context->vdenc_streamin_enable = 0; vdenc_context->vdenc_pak_threshold_check_enable = 0; + vdenc_context->is_frame_level_vdenc = 0; + vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context); gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context); encoder_context->mfc_context = vdenc_context; diff --git a/src/gen9_vdenc.h b/src/gen9_vdenc.h index 41e4362..ad0f2ae 100644 --- a/src/gen9_vdenc.h +++ b/src/gen9_vdenc.h @@ -304,7 +304,8 @@ struct gen9_vdenc_img_state uint32_t bidirectional_mix_disable:1; uint32_t pad1:1; uint32_t time_budget_overflow_check:1; - uint32_t pad2:2; + uint32_t pad2:1; + uint32_t extended_pak_obj_cmd_enable:1; uint32_t transform_8x8_flag:1; uint32_t vdenc_l1_cache_priority:2; uint32_t pad3:22; @@ -776,7 +777,9 @@ struct gen9_vdenc_context uint32_t frame_type:2; uint32_t mb_brc_enabled:1; - uint32_t pad0:31; + uint32_t is_frame_level_vdenc:1; + uint32_t use_extended_pak_obj_cmd:1; + uint32_t pad0:29; struct i965_gpe_resource brc_init_reset_dmem_res; struct i965_gpe_resource brc_history_buffer_res; diff --git a/src/i965_defines.h b/src/i965_defines.h index f86ac8e..941ad4e 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -959,6 +959,7 @@ #define VDENC_IMG_STATE VDENC(1, 0, 5) #define VDENC_CONST_QPT_STATE VDENC(1, 0, 6) #define VDENC_WALKER_STATE VDENC(1, 0, 7) +#define VDENC_WEIGHTSOFFSETS_STATE VDENC(1, 0, 8) #define VDENC_CODEC_AVC 2 diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 73602ae..0fc8930 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -481,6 +481,9 @@ static struct hw_codec_info kbl_hw_codec_info = { .has_vp9_decoding = 1, .has_vpp_p010 = 1, .has_vp9_encoding = 1, + .has_lp_h264_encoding = 1, + + .lp_h264_brc_mode = VA_RC_CQP, .num_filters = 5, .filters = { -- cgit v1.2.3