diff options
-rw-r--r-- | drivers/media/platform/verisilicon/Kconfig | 8 | ||||
-rw-r--r-- | drivers/media/platform/verisilicon/hantro_g2.c | 29 | ||||
-rw-r--r-- | drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c | 20 | ||||
-rw-r--r-- | drivers/media/platform/verisilicon/hantro_g2_regs.h | 4 | ||||
-rw-r--r-- | drivers/media/platform/verisilicon/hantro_hevc.c | 8 | ||||
-rw-r--r-- | drivers/media/platform/verisilicon/hantro_hw.h | 38 | ||||
-rw-r--r-- | drivers/media/platform/verisilicon/hantro_postproc.c | 6 |
7 files changed, 109 insertions, 4 deletions
diff --git a/drivers/media/platform/verisilicon/Kconfig b/drivers/media/platform/verisilicon/Kconfig index 149d0b32c324..3272a24db71d 100644 --- a/drivers/media/platform/verisilicon/Kconfig +++ b/drivers/media/platform/verisilicon/Kconfig @@ -21,6 +21,14 @@ config VIDEO_HANTRO To compile this driver as a module, choose M here: the module will be called hantro-vpu. +config VIDEO_HANTRO_HEVC_RFC + bool "Use reference frame compression for HEVC" + depends on VIDEO_HANTRO + default n + help + Enable the reference frame compression feature for the HEVC codec. + It will use more memory but save bandwidth on memory bus. + config VIDEO_HANTRO_IMX8M bool "Hantro VPU i.MX8M support" depends on VIDEO_HANTRO diff --git a/drivers/media/platform/verisilicon/hantro_g2.c b/drivers/media/platform/verisilicon/hantro_g2.c index b880a6849d58..5c1d799d8618 100644 --- a/drivers/media/platform/verisilicon/hantro_g2.c +++ b/drivers/media/platform/verisilicon/hantro_g2.c @@ -56,3 +56,32 @@ size_t hantro_g2_motion_vectors_offset(struct hantro_ctx *ctx) return ALIGN((cr_offset * 3) / 2, G2_ALIGN); } + +static size_t hantro_g2_mv_size(struct hantro_ctx *ctx) +{ + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; + unsigned int pic_width_in_ctbs, pic_height_in_ctbs; + unsigned int max_log2_ctb_size; + + max_log2_ctb_size = sps->log2_min_luma_coding_block_size_minus3 + 3 + + sps->log2_diff_max_min_luma_coding_block_size; + pic_width_in_ctbs = (sps->pic_width_in_luma_samples + + (1 << max_log2_ctb_size) - 1) >> max_log2_ctb_size; + pic_height_in_ctbs = (sps->pic_height_in_luma_samples + (1 << max_log2_ctb_size) - 1) + >> max_log2_ctb_size; + + return pic_width_in_ctbs * pic_height_in_ctbs * (1 << (2 * (max_log2_ctb_size - 4))) * 16; +} + +size_t hantro_g2_luma_compress_offset(struct hantro_ctx *ctx) +{ + return hantro_g2_motion_vectors_offset(ctx) + + hantro_g2_mv_size(ctx); +} + +size_t hantro_g2_chroma_compress_offset(struct hantro_ctx *ctx) +{ + return hantro_g2_luma_compress_offset(ctx) + + hantro_hevc_luma_compressed_size(ctx->dst_fmt.width, ctx->dst_fmt.height); +} diff --git a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c index d3f8c33eb16c..85a44143b378 100644 --- a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c +++ b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c @@ -367,11 +367,14 @@ static int set_ref(struct hantro_ctx *ctx) const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params; const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb; dma_addr_t luma_addr, chroma_addr, mv_addr = 0; + dma_addr_t compress_luma_addr, compress_chroma_addr = 0; struct hantro_dev *vpu = ctx->dev; struct vb2_v4l2_buffer *vb2_dst; struct hantro_decoded_buffer *dst; size_t cr_offset = hantro_g2_chroma_offset(ctx); size_t mv_offset = hantro_g2_motion_vectors_offset(ctx); + size_t compress_luma_offset = hantro_g2_luma_compress_offset(ctx); + size_t compress_chroma_offset = hantro_g2_chroma_compress_offset(ctx); u32 max_ref_frames; u16 dpb_longterm_e; static const struct hantro_reg cur_poc[] = { @@ -445,6 +448,8 @@ static int set_ref(struct hantro_ctx *ctx) chroma_addr = luma_addr + cr_offset; mv_addr = luma_addr + mv_offset; + compress_luma_addr = luma_addr + compress_luma_offset; + compress_chroma_addr = luma_addr + compress_chroma_offset; if (dpb[i].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE) dpb_longterm_e |= BIT(V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1 - i); @@ -452,6 +457,8 @@ static int set_ref(struct hantro_ctx *ctx) hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr); hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr); hantro_write_addr(vpu, G2_REF_MV_ADDR(i), mv_addr); + hantro_write_addr(vpu, G2_REF_COMP_LUMA_ADDR(i), compress_luma_addr); + hantro_write_addr(vpu, G2_REF_COMP_CHROMA_ADDR(i), compress_chroma_addr); } vb2_dst = hantro_get_dst_buf(ctx); @@ -465,19 +472,27 @@ static int set_ref(struct hantro_ctx *ctx) chroma_addr = luma_addr + cr_offset; mv_addr = luma_addr + mv_offset; + compress_luma_addr = luma_addr + compress_luma_offset; + compress_chroma_addr = luma_addr + compress_chroma_offset; hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr); hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr); - hantro_write_addr(vpu, G2_REF_MV_ADDR(i++), mv_addr); + hantro_write_addr(vpu, G2_REF_MV_ADDR(i), mv_addr); + hantro_write_addr(vpu, G2_REF_COMP_LUMA_ADDR(i), compress_luma_addr); + hantro_write_addr(vpu, G2_REF_COMP_CHROMA_ADDR(i++), compress_chroma_addr); hantro_write_addr(vpu, G2_OUT_LUMA_ADDR, luma_addr); hantro_write_addr(vpu, G2_OUT_CHROMA_ADDR, chroma_addr); hantro_write_addr(vpu, G2_OUT_MV_ADDR, mv_addr); + hantro_write_addr(vpu, G2_OUT_COMP_LUMA_ADDR, compress_luma_addr); + hantro_write_addr(vpu, G2_OUT_COMP_CHROMA_ADDR, compress_chroma_addr); for (; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) { hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), 0); hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), 0); hantro_write_addr(vpu, G2_REF_MV_ADDR(i), 0); + hantro_write_addr(vpu, G2_REF_COMP_LUMA_ADDR(i), 0); + hantro_write_addr(vpu, G2_REF_COMP_CHROMA_ADDR(i), 0); } hantro_reg_write(vpu, &g2_refer_lterm_e, dpb_longterm_e); @@ -594,8 +609,7 @@ int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx) /* Don't disable output */ hantro_reg_write(vpu, &g2_out_dis, 0); - /* Don't compress buffers */ - hantro_reg_write(vpu, &g2_ref_compress_bypass, 1); + hantro_reg_write(vpu, &g2_ref_compress_bypass, !ctx->hevc_dec.use_compression); /* Bus width and max burst */ hantro_reg_write(vpu, &g2_buswidth, BUS_WIDTH_128); diff --git a/drivers/media/platform/verisilicon/hantro_g2_regs.h b/drivers/media/platform/verisilicon/hantro_g2_regs.h index 82606783591a..b943b1816db7 100644 --- a/drivers/media/platform/verisilicon/hantro_g2_regs.h +++ b/drivers/media/platform/verisilicon/hantro_g2_regs.h @@ -318,6 +318,10 @@ #define G2_TILE_BSD_ADDR (G2_SWREG(183)) #define G2_DS_DST (G2_SWREG(186)) #define G2_DS_DST_CHR (G2_SWREG(188)) +#define G2_OUT_COMP_LUMA_ADDR (G2_SWREG(190)) +#define G2_REF_COMP_LUMA_ADDR(i) (G2_SWREG(192) + ((i) * 0x8)) +#define G2_OUT_COMP_CHROMA_ADDR (G2_SWREG(224)) +#define G2_REF_COMP_CHROMA_ADDR(i) (G2_SWREG(226) + ((i) * 0x8)) #define g2_strm_buffer_len G2_DEC_REG(258, 0, 0xffffffff) #define g2_strm_start_offset G2_DEC_REG(259, 0, 0xffffffff) diff --git a/drivers/media/platform/verisilicon/hantro_hevc.c b/drivers/media/platform/verisilicon/hantro_hevc.c index 2c14330bc562..83cd12b0ddd6 100644 --- a/drivers/media/platform/verisilicon/hantro_hevc.c +++ b/drivers/media/platform/verisilicon/hantro_hevc.c @@ -25,6 +25,11 @@ #define MAX_TILE_COLS 20 #define MAX_TILE_ROWS 22 +static bool hevc_use_compression = IS_ENABLED(CONFIG_VIDEO_HANTRO_HEVC_RFC); +module_param_named(hevc_use_compression, hevc_use_compression, bool, 0644); +MODULE_PARM_DESC(hevc_use_compression, + "Use reference frame compression for HEVC"); + void hantro_hevc_ref_init(struct hantro_ctx *ctx) { struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; @@ -275,5 +280,8 @@ int hantro_hevc_dec_init(struct hantro_ctx *ctx) hantro_hevc_ref_init(ctx); + hevc_dec->use_compression = + hevc_use_compression & hantro_needs_postproc(ctx, ctx->vpu_dst_fmt); + return 0; } diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h index 7737320cc8cc..c9b6556f8b2b 100644 --- a/drivers/media/platform/verisilicon/hantro_hw.h +++ b/drivers/media/platform/verisilicon/hantro_hw.h @@ -42,6 +42,13 @@ #define MAX_POSTPROC_BUFFERS 64 +#define CBS_SIZE 16 /* compression table size in bytes */ +#define CBS_LUMA 8 /* luminance CBS is composed of 1 8x8 coded block */ +#define CBS_CHROMA_W (8 * 2) /* chrominance CBS is composed of two 8x4 coded + * blocks, with Cb CB first then Cr CB following + */ +#define CBS_CHROMA_H 4 + struct hantro_dev; struct hantro_ctx; struct hantro_buf; @@ -144,6 +151,7 @@ struct hantro_hevc_dec_ctrls { * @ref_bufs_used: Bitfield of used reference buffers * @ctrls: V4L2 controls attached to a run * @num_tile_cols_allocated: number of allocated tiles + * @use_compression: use reference buffer compression */ struct hantro_hevc_dec_hw_ctx { struct hantro_aux_buf tile_sizes; @@ -156,6 +164,7 @@ struct hantro_hevc_dec_hw_ctx { u32 ref_bufs_used; struct hantro_hevc_dec_ctrls ctrls; unsigned int num_tile_cols_allocated; + bool use_compression; }; /** @@ -510,6 +519,33 @@ hantro_hevc_mv_size(unsigned int width, unsigned int height) return width * height / 16; } +static inline size_t +hantro_hevc_luma_compressed_size(unsigned int width, unsigned int height) +{ + u32 pic_width_in_cbsy = + round_up((width + CBS_LUMA - 1) / CBS_LUMA, CBS_SIZE); + u32 pic_height_in_cbsy = (height + CBS_LUMA - 1) / CBS_LUMA; + + return round_up(pic_width_in_cbsy * pic_height_in_cbsy, CBS_SIZE); +} + +static inline size_t +hantro_hevc_chroma_compressed_size(unsigned int width, unsigned int height) +{ + u32 pic_width_in_cbsc = + round_up((width + CBS_CHROMA_W - 1) / CBS_CHROMA_W, CBS_SIZE); + u32 pic_height_in_cbsc = (height / 2 + CBS_CHROMA_H - 1) / CBS_CHROMA_H; + + return round_up(pic_width_in_cbsc * pic_height_in_cbsc, CBS_SIZE); +} + +static inline size_t +hantro_hevc_compressed_size(unsigned int width, unsigned int height) +{ + return hantro_hevc_luma_compressed_size(width, height) + + hantro_hevc_chroma_compressed_size(width, height); +} + static inline unsigned short hantro_av1_num_sbs(unsigned short dimension) { return DIV_ROUND_UP(dimension, 64); @@ -525,6 +561,8 @@ hantro_av1_mv_size(unsigned int width, unsigned int height) size_t hantro_g2_chroma_offset(struct hantro_ctx *ctx); size_t hantro_g2_motion_vectors_offset(struct hantro_ctx *ctx); +size_t hantro_g2_luma_compress_offset(struct hantro_ctx *ctx); +size_t hantro_g2_chroma_compress_offset(struct hantro_ctx *ctx); int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx); int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx); diff --git a/drivers/media/platform/verisilicon/hantro_postproc.c b/drivers/media/platform/verisilicon/hantro_postproc.c index 41e93176300b..232c93eea7ee 100644 --- a/drivers/media/platform/verisilicon/hantro_postproc.c +++ b/drivers/media/platform/verisilicon/hantro_postproc.c @@ -213,9 +213,13 @@ static unsigned int hantro_postproc_buffer_size(struct hantro_ctx *ctx) else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME) buf_size += hantro_vp9_mv_size(pix_mp.width, pix_mp.height); - else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) + else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) { buf_size += hantro_hevc_mv_size(pix_mp.width, pix_mp.height); + if (ctx->hevc_dec.use_compression) + buf_size += hantro_hevc_compressed_size(pix_mp.width, + pix_mp.height); + } else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_AV1_FRAME) buf_size += hantro_av1_mv_size(pix_mp.width, pix_mp.height); |