diff options
author | Dmitry Osipenko <digetx@gmail.com> | 2017-12-27 19:03:12 +0300 |
---|---|---|
committer | Dmitry Osipenko <digetx@gmail.com> | 2017-12-28 16:38:27 +0300 |
commit | b7e346b4465ce70bfd8b53b11d1c2fd8e62614a8 (patch) | |
tree | e8f75578c2291cb329b4bc869c0007d68a7a5c18 | |
parent | 6ceb024bbef46bc4c9a39df14dfadfd23753b3bb (diff) |
Precalculate CSC params
-rw-r--r-- | src/host1x-gr2d.c | 50 | ||||
-rw-r--r-- | src/host1x.h | 22 | ||||
-rw-r--r-- | src/surface_mixer.c | 70 | ||||
-rw-r--r-- | src/surface_output.c | 2 | ||||
-rw-r--r-- | src/surface_shared.c | 33 | ||||
-rw-r--r-- | src/vdpau_tegra.h | 10 |
6 files changed, 98 insertions, 89 deletions
diff --git a/src/host1x-gr2d.c b/src/host1x-gr2d.c index aea1b31..a459bac 100644 --- a/src/host1x-gr2d.c +++ b/src/host1x-gr2d.c @@ -23,24 +23,9 @@ #include "vdpau_tegra.h" -#define FLOAT_TO_FIXED_6_12(fp) \ - (((int32_t) (fp * 4096.0f + 0.5f)) & ((1 << 18) - 1)) - -#define FLOAT_TO_FIXED_s2_7(fp) \ - (((fp < 0.0f) << 9) | (((int32_t) (fabs(fp) * 128.0f)) & ((1 << 9) - 1))) - -#define FLOAT_TO_FIXED_s1_7(fp) \ - (((fp < 0.0f) << 8) | (((int32_t) (fabs(fp) * 128.0f)) & ((1 << 8) - 1))) - -#define FLOAT_TO_FIXED_0_8(fp) \ - (((int32_t) (fp * 256.0f + 0.5f)) & ((1 << 8) - 1)) - -static float fclamp(float f, float min, float max) -{ - if (f < min) return min; - if (f > max) return max; - return f; -} +struct host1x_csc_params csc_rgb_default = { + .cvr = 0x80, .cub = 0x80, .cyx = 0x80, +}; int host1x_gr2d_clear(struct tegra_stream *stream, struct host1x_pixelbuffer *pixbuf, @@ -339,7 +324,7 @@ static uint32_t sb_offset(struct host1x_pixelbuffer *pixbuf, int host1x_gr2d_surface_blit(struct tegra_stream *stream, struct host1x_pixelbuffer *src, struct host1x_pixelbuffer *dst, - VdpCSCMatrix *cscmat, + struct host1x_csc_params *csc, unsigned int sx, unsigned int sy, unsigned int src_width, unsigned int src_height, unsigned int dx, unsigned int dy, @@ -487,30 +472,15 @@ coords_check: if (src->format != PIX_BUF_FMT_YV12) { tegra_stream_push(stream, HOST1X_OPCODE_MASK(0x15, 0x787)); - tegra_stream_push(stream, - /* cvr */ FLOAT_TO_FIXED_s2_7(1.0f) << 12 | - /* cub */ FLOAT_TO_FIXED_s2_7(1.0f)); /* cscfirst */ - tegra_stream_push(stream, - /* cyx */ FLOAT_TO_FIXED_s1_7(1.0f) << 24 | - /* cur */ FLOAT_TO_FIXED_s2_7(0.0f) << 12 | - /* cug */ FLOAT_TO_FIXED_s1_7(0.0f)); /* cscsecond */ - tegra_stream_push(stream, - /* cvb */ FLOAT_TO_FIXED_s2_7(0.0f) << 16 | - /* cvg */ FLOAT_TO_FIXED_s1_7(0.0f)); /* cscthird */ + tegra_stream_push(stream, csc->yos << 24 | csc->cvr << 12 | csc->cub); /* cscfirst */ + tegra_stream_push(stream, csc->cyx << 24 | csc->cur << 12 | csc->cug); /* cscsecond */ + tegra_stream_push(stream, csc->cvb << 16 | csc->cvg); /* cscthird */ } else { tegra_stream_push(stream, HOST1X_OPCODE_MASK(0x15, 0x7E7)); - tegra_stream_push(stream, - /* yos */ (-16) << 24 | - /* cvr */ FLOAT_TO_FIXED_s2_7( fclamp((*cscmat)[0][2], -3.98f, 3.98f) ) << 12 | - /* cub */ FLOAT_TO_FIXED_s2_7( fclamp((*cscmat)[2][1], -3.98f, 3.98f) )); /* cscfirst */ - tegra_stream_push(stream, - /* cyx */ FLOAT_TO_FIXED_s1_7( fclamp((*cscmat)[0][0], -1.98f, 1.98f) ) << 24 | - /* cur */ FLOAT_TO_FIXED_s2_7( fclamp((*cscmat)[0][1], -3.98f, 3.98f) ) << 12 | - /* cug */ FLOAT_TO_FIXED_s1_7( fclamp((*cscmat)[1][1], -1.98f, 1.98f) )); /* cscsecond */ - tegra_stream_push(stream, - /* cvb */ FLOAT_TO_FIXED_s2_7( fclamp((*cscmat)[2][2], -3.98f, 3.98f) ) << 16 | - /* cvg */ FLOAT_TO_FIXED_s1_7( fclamp((*cscmat)[1][2], -1.98f, 1.98f) )); /* cscthird */ + tegra_stream_push(stream, csc->yos << 24 | csc->cvr << 12 | csc->cub); /* cscfirst */ + tegra_stream_push(stream, csc->cyx << 24 | csc->cur << 12 | csc->cug); /* cscsecond */ + tegra_stream_push(stream, csc->cvb << 16 | csc->cvg); /* cscthird */ tegra_stream_push_reloc(stream, src->bos[1], src->bo_offset[1]); /* uba */ tegra_stream_push_reloc(stream, src->bos[2], src->bo_offset[2]); /* vba */ diff --git a/src/host1x.h b/src/host1x.h index 06a20ca..15125f0 100644 --- a/src/host1x.h +++ b/src/host1x.h @@ -25,6 +25,18 @@ #ifndef HOST1X_H #define HOST1X_H +#define FLOAT_TO_FIXED_6_12(fp) \ + (((int32_t) (fp * 4096.0f + 0.5f)) & ((1 << 18) - 1)) + +#define FLOAT_TO_FIXED_s2_7(fp) \ + (((fp < 0.0f) << 9) | (((int32_t) (fabs(fp) * 128.0f)) & ((1 << 9) - 1))) + +#define FLOAT_TO_FIXED_s1_7(fp) \ + (((fp < 0.0f) << 8) | (((int32_t) (fabs(fp) * 128.0f)) & ((1 << 8) - 1))) + +#define FLOAT_TO_FIXED_0_8(fp) \ + (((int32_t) (fp * 256.0f + 0.5f)) & ((1 << 8) - 1)) + #define HOST1X_OPCODE_SETCL(offset, classid, mask) \ ((0x0 << 28) | (((offset) & 0xfff) << 16) | (((classid) & 0x3ff) << 6) | ((mask) & 0x3f)) #define HOST1X_OPCODE_INCR(offset, count) \ @@ -78,6 +90,14 @@ enum layout_format { PIX_BUF_LAYOUT_TILED_16x16, }; +struct host1x_csc_params { + uint32_t yos, cvr, cub; + uint32_t cyx, cur, cug; + uint32_t cvb, cvg; +}; + +extern struct host1x_csc_params csc_rgb_default; + struct host1x_pixelbuffer { union { struct drm_tegra_bo *bo; @@ -149,7 +169,7 @@ int host1x_gr2d_blit(struct tegra_stream *stream, int host1x_gr2d_surface_blit(struct tegra_stream *stream, struct host1x_pixelbuffer *src, struct host1x_pixelbuffer *dst, - VdpCSCMatrix *cscmat, + struct host1x_csc_params *csc, unsigned int sx, unsigned int sy, unsigned int src_width, unsigned int src_height, unsigned int dx, unsigned int dy, diff --git a/src/surface_mixer.c b/src/surface_mixer.c index eba10b9..c6b68e7 100644 --- a/src/surface_mixer.c +++ b/src/surface_mixer.c @@ -19,6 +19,44 @@ #include "vdpau_tegra.h" +static bool custom_csc(VdpCSCMatrix const csc_matrix) +{ + int i, k; + + if (memcmp(csc_matrix, CSC_BT_601, sizeof(VdpCSCMatrix)) == 0 || + memcmp(csc_matrix, CSC_BT_709, sizeof(VdpCSCMatrix)) == 0) + return false; + + for (i = 0; i < 3; i++) + for (k = 0; k < 3; k++) + if (fabs(csc_matrix[i][k] - CSC_BT_601[i][k]) > 0.01f) + goto check_709; + + return false; + + /* XXX: Tegra's CSC is hardcoded to BT601 in the kernel driver */ +check_709: + for (i = 0; i < 3; i++) + for (k = 0; k < 3; k++) + if (fabs(csc_matrix[i][k] - CSC_BT_709[i][k]) > 0.01f) + return true; + + return false; +} + +static void mixer_apply_vdp_csc(tegra_mixer *mix, VdpCSCMatrix const cscmat) +{ + mix->csc.yos = -16; + mix->csc.cvr = FLOAT_TO_FIXED_s2_7( CLAMP(cscmat[0][2], -3.98f, 3.98f) ); + mix->csc.cub = FLOAT_TO_FIXED_s2_7( CLAMP(cscmat[2][1], -3.98f, 3.98f) ); + mix->csc.cyx = FLOAT_TO_FIXED_s1_7( CLAMP(cscmat[0][0], -1.98f, 1.98f) ); + mix->csc.cur = FLOAT_TO_FIXED_s2_7( CLAMP(cscmat[0][1], -3.98f, 3.98f) ); + mix->csc.cug = FLOAT_TO_FIXED_s1_7( CLAMP(cscmat[1][1], -1.98f, 1.98f) ); + mix->csc.cvb = FLOAT_TO_FIXED_s2_7( CLAMP(cscmat[2][2], -3.98f, 3.98f) ); + mix->csc.cvg = FLOAT_TO_FIXED_s1_7( CLAMP(cscmat[1][2], -1.98f, 1.98f) ); + mix->custom_csc = custom_csc(cscmat); +} + VdpStatus vdp_video_mixer_query_feature_support(VdpDevice device, VdpVideoMixerFeature feature, VdpBool *is_supported) @@ -207,6 +245,8 @@ VdpStatus vdp_video_mixer_create(VdpDevice device, ref_device(dev); mix->dev = dev; + mixer_apply_vdp_csc(mix, CSC_BT_709); + *mixer = i; return VDP_STATUS_OK; @@ -243,8 +283,7 @@ VdpStatus vdp_video_mixer_set_attribute_values( while (count--) { switch (attributes[count]) { case VDP_VIDEO_MIXER_ATTRIBUTE_CSC_MATRIX: - memcpy(&mix->csc_matrix, attribute_values[count], - sizeof(VdpCSCMatrix)); + mixer_apply_vdp_csc(mix, attribute_values[count]); break; case VDP_VIDEO_MIXER_ATTRIBUTE_BACKGROUND_COLOR: @@ -465,7 +504,7 @@ VdpStatus vdp_video_mixer_render( host1x_gr2d_surface_blit(mix->dev->stream, bg_surf->pixbuf, dest_surf->pixbuf, - &mix->csc_matrix, + &csc_rgb_default, bg_x0, bg_y0, bg_width, bg_height, 0, @@ -475,17 +514,18 @@ VdpStatus vdp_video_mixer_render( } if (!draw_background) { - shared = create_shared_surface(dest_surf, - video_surf, - &mix->csc_matrix, - src_vid_x0, - src_vid_y0, - src_vid_width, - src_vid_height, - dst_vid_x0, - dst_vid_y0, - dst_vid_width, - dst_vid_height); + if (!mix->custom_csc) + shared = create_shared_surface(dest_surf, + video_surf, + &mix->csc, + src_vid_x0, + src_vid_y0, + src_vid_width, + src_vid_height, + dst_vid_x0, + dst_vid_y0, + dst_vid_width, + dst_vid_height); if (!shared) { ret = dynamic_alloc_surface_data(dest_surf); if (ret) { @@ -509,7 +549,7 @@ VdpStatus vdp_video_mixer_render( host1x_gr2d_surface_blit(mix->dev->stream, video_surf->pixbuf, dest_surf->pixbuf, - &mix->csc_matrix, + &mix->csc, src_vid_x0, src_vid_y0, src_vid_width, diff --git a/src/surface_output.c b/src/surface_output.c index f5ebdd9..adb55c9 100644 --- a/src/surface_output.c +++ b/src/surface_output.c @@ -315,7 +315,7 @@ VdpStatus vdp_output_surface_render_bitmap_surface( host1x_gr2d_surface_blit(dst_surf->dev->stream, src_surf->pixbuf, dst_surf->pixbuf, - NULL, + &csc_rgb_default, src_x0, src_y0, src_width, src_height, dst_x0, dst_y0, diff --git a/src/surface_shared.c b/src/surface_shared.c index 0958933..8a0d6ae 100644 --- a/src/surface_shared.c +++ b/src/surface_shared.c @@ -61,34 +61,9 @@ static XvImage * create_video_xv(tegra_surface *video) return xv_img; } -static bool custom_csc(VdpCSCMatrix const *csc_matrix) -{ - int i, k; - - if (memcmp(*csc_matrix, CSC_BT_601, sizeof(VdpCSCMatrix)) == 0 || - memcmp(*csc_matrix, CSC_BT_709, sizeof(VdpCSCMatrix)) == 0) - return false; - - for (i = 0; i < 3; i++) - for (k = 0; k < 3; k++) - if (fabs((*csc_matrix)[i][k] - CSC_BT_601[i][k]) > 0.01f) - goto check_709; - - return false; - - /* XXX: Tegra's CSC is hardcoded to BT601 in the kernel driver */ -check_709: - for (i = 0; i < 3; i++) - for (k = 0; k < 3; k++) - if (fabs((*csc_matrix)[i][k] - CSC_BT_709[i][k]) > 0.01f) - return true; - - return false; -} - tegra_shared_surface *create_shared_surface(tegra_surface *disp, tegra_surface *video, - VdpCSCMatrix const *csc_matrix, + struct host1x_csc_params *csc, uint32_t src_x0, uint32_t src_y0, uint32_t src_width, @@ -101,7 +76,7 @@ tegra_shared_surface *create_shared_surface(tegra_surface *disp, tegra_shared_surface *shared; int ret; - if (disp->data_dirty || custom_csc(csc_matrix)) { + if (disp->data_dirty) { return NULL; } @@ -113,7 +88,7 @@ tegra_shared_surface *create_shared_surface(tegra_surface *disp, assert(disp->shared == NULL); atomic_set(&shared->refcnt, 1); - memcpy(&shared->csc_matrix, csc_matrix, sizeof(VdpCSCMatrix)); + memcpy(&shared->csc, csc, sizeof(*csc)); shared->xv_img = create_video_xv(video); shared->video = video; @@ -238,7 +213,7 @@ int shared_surface_transfer_video(tegra_surface *disp) host1x_gr2d_surface_blit(video->dev->stream, video->pixbuf, disp->pixbuf, - &shared->csc_matrix, + &shared->csc, shared->src_x0, shared->src_y0, shared->src_width, diff --git a/src/vdpau_tegra.h b/src/vdpau_tegra.h index 994e7c8..00bd1ea 100644 --- a/src/vdpau_tegra.h +++ b/src/vdpau_tegra.h @@ -102,6 +102,9 @@ fprintf(stderr, "%s:%d/%s(): " fmt, \ __FILE__, __LINE__, __func__, ##args) +#define CLAMP(_v, _vmin, _vmax) \ + (((_v) < (_vmin) ? (_vmin) : (((_v) > (_vmax)) ? (_vmax) : (_v)))) + #define UNIFIED_BUFFER 0 extern VdpCSCMatrix CSC_BT_601; @@ -131,7 +134,7 @@ typedef struct tegra_shared_surface { atomic_t refcnt; struct tegra_surface *video; struct tegra_surface *disp; - VdpCSCMatrix csc_matrix; + struct host1x_csc_params csc; uint32_t src_x0, src_y0, src_width, src_height; uint32_t dst_x0, dst_y0, dst_width, dst_height; XvImage *xv_img; @@ -194,9 +197,10 @@ typedef struct tegra_decoder { } tegra_decoder; typedef struct tegra_mixer { - VdpCSCMatrix csc_matrix; + struct host1x_csc_params csc; VdpColor bg_color; tegra_device *dev; + bool custom_csc; } tegra_mixer; typedef struct tegra_pqt { @@ -294,7 +298,7 @@ int sync_video_frame_dmabufs(tegra_surface *surf, enum frame_sync type); tegra_shared_surface *create_shared_surface(tegra_surface *disp, tegra_surface *video, - VdpCSCMatrix const *csc_matrix, + struct host1x_csc_params *csc, uint32_t src_x0, uint32_t src_y0, uint32_t src_width, |