diff options
author | Xiang, Haihao <haihao.xiang@intel.com> | 2010-09-02 15:22:39 +0800 |
---|---|---|
committer | Xiang, Haihao <haihao.xiang@intel.com> | 2010-09-15 08:38:10 +0800 |
commit | 62641bd7215a8cb1637158767434e896aba78dba (patch) | |
tree | 7253ab65bf906b83f7195d7dffc033d999882378 | |
parent | 20975a94de533ddcf17e3ec4b09bf3bd2ce71376 (diff) |
i965_drv_video: deinterlacing & scaling
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
-rw-r--r-- | i965_drv_video/Makefile.am | 6 | ||||
-rw-r--r-- | i965_drv_video/i965_defines.h | 23 | ||||
-rw-r--r-- | i965_drv_video/i965_drv_video.c | 17 | ||||
-rw-r--r-- | i965_drv_video/i965_drv_video.h | 5 | ||||
-rw-r--r-- | i965_drv_video/i965_post_processing.c | 2029 | ||||
-rw-r--r-- | i965_drv_video/i965_post_processing.h | 150 | ||||
-rw-r--r-- | i965_drv_video/i965_render.c | 32 | ||||
-rw-r--r-- | i965_drv_video/i965_render.h | 8 | ||||
-rw-r--r-- | i965_drv_video/i965_structs.h | 325 | ||||
-rw-r--r-- | i965_drv_video/intel_batchbuffer.c | 2 | ||||
-rw-r--r-- | i965_drv_video/intel_driver.h | 2 |
11 files changed, 2584 insertions, 15 deletions
diff --git a/i965_drv_video/Makefile.am b/i965_drv_video/Makefile.am index 058b525..f32d579 100644 --- a/i965_drv_video/Makefile.am +++ b/i965_drv_video/Makefile.am @@ -42,7 +42,8 @@ i965_drv_video_la_SOURCES = \ i965_drv_video.c \ i965_avc_bsd.c \ i965_avc_hw_scoreboard.c\ - i965_avc_ildb.c + i965_avc_ildb.c \ + i965_post_processing.c noinst_HEADERS = \ object_heap.h \ @@ -59,4 +60,5 @@ noinst_HEADERS = \ i965_structs.h \ i965_avc_bsd.h \ i965_avc_hw_scoreboard.h\ - i965_avc_ildb.h + i965_avc_ildb.h \ + i965_post_processing.h diff --git a/i965_drv_video/i965_defines.h b/i965_drv_video/i965_defines.h index aa2baa3..839712e 100644 --- a/i965_drv_video/i965_defines.h +++ b/i965_drv_video/i965_defines.h @@ -357,6 +357,29 @@ #define SCOREBOARD_STALLING 0 #define SCOREBOARD_NON_STALLING 1 +#define SURFACE_FORMAT_YCRCB_NORMAL 0 +#define SURFACE_FORMAT_YCRCB_SWAPUVY 1 +#define SURFACE_FORMAT_YCRCB_SWAPUV 2 +#define SURFACE_FORMAT_YCRCB_SWAPY 3 +#define SURFACE_FORMAT_PLANAR_420_8 4 +#define SURFACE_FORMAT_PLANAR_411_8 5 +#define SURFACE_FORMAT_PLANAR_422_8 6 +#define SURFACE_FORMAT_STMM_DN_STATISTICS 7 +#define SURFACE_FORMAT_R10G10B10A2_UNORM 8 +#define SURFACE_FORMAT_R8G8B8A8_UNORM 9 +#define SURFACE_FORMAT_R8B8_UNORM 10 +#define SURFACE_FORMAT_R8_UNORM 11 +#define SURFACE_FORMAT_Y8_UNORM 12 + +#define AVS_FILTER_ADAPTIVE_8_TAP 0 +#define AVS_FILTER_NEAREST 1 + +#define IEF_FILTER_COMBO 0 +#define IEF_FILTER_DETAIL 1 + +#define IEF_FILTER_SIZE_3X3 0 +#define IEF_FILTER_SIZE_5X5 1 + #define URB_SIZE(intel) (IS_IRONLAKE(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) #endif /* _I965_DEFINES_H_ */ diff --git a/i965_drv_video/i965_drv_video.c b/i965_drv_video/i965_drv_video.c index 104c105..ec5412d 100644 --- a/i965_drv_video/i965_drv_video.c +++ b/i965_drv_video/i965_drv_video.c @@ -350,6 +350,8 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj) dri_bo_unreference(obj_surface->bo); obj_surface->bo = NULL; + dri_bo_unreference(obj_surface->pp_out_bo); + obj_surface->pp_out_bo = NULL; if (obj_surface->free_private_data != NULL) { obj_surface->free_private_data(&obj_surface->private_data); @@ -395,6 +397,7 @@ i965_CreateSurfaces(VADriverContextP ctx, obj_surface->size = SIZE_YUV420(obj_surface->width, obj_surface->height); obj_surface->flags = SURFACE_REFERENCED; obj_surface->bo = NULL; + obj_surface->pp_out_bo = NULL; obj_surface->private_data = NULL; obj_surface->free_private_data = NULL; } @@ -1644,7 +1647,7 @@ i965_GetImage(VADriverContextP ctx, VAStatus i965_PutSurface(VADriverContextP ctx, VASurfaceID surface, - Drawable draw, /* X Drawable */ + void *draw, /* X Drawable */ short srcx, short srcy, unsigned short srcw, @@ -1667,6 +1670,7 @@ i965_PutSurface(VADriverContextP ctx, int ret; uint32_t name; Bool new_region = False; + int pp_flag = 0; /* Currently don't support DRI1 */ if (dri_state->driConnectedFlag != VA_DRI2) return VA_STATUS_ERROR_UNKNOWN; @@ -1678,7 +1682,7 @@ i965_PutSurface(VADriverContextP ctx, if (obj_surface->bo == NULL) return VA_STATUS_SUCCESS; - dri_drawable = dri_get_drawable(ctx, draw); + dri_drawable = dri_get_drawable(ctx, (Drawable)draw); assert(dri_drawable); buffer = dri_get_rendering_buffer(ctx, dri_drawable); @@ -1716,9 +1720,16 @@ i965_PutSurface(VADriverContextP ctx, assert(ret == 0); } + if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC) + pp_flag |= I965_PP_FLAG_AVS; + + if (flags & (VA_BOTTOM_FIELD | VA_TOP_FIELD)) + pp_flag |= I965_PP_FLAG_DEINTERLACING; + i965_render_put_surface(ctx, surface, srcx, srcy, srcw, srch, - destx, desty, destw, desth); + destx, desty, destw, desth, + pp_flag); if(obj_surface->subpic != VA_INVALID_ID) { i965_render_put_subpic(ctx, surface, diff --git a/i965_drv_video/i965_drv_video.h b/i965_drv_video/i965_drv_video.h index 8643bd6..7fc9cdb 100644 --- a/i965_drv_video/i965_drv_video.h +++ b/i965_drv_video/i965_drv_video.h @@ -109,6 +109,11 @@ struct object_surface int orig_height; int flags; dri_bo *bo; + int pp_out_width; + int pp_out_height; + int orig_pp_out_width; + int orig_pp_out_height; + dri_bo *pp_out_bo; void (*free_private_data)(void **data); void *private_data; }; diff --git a/i965_drv_video/i965_post_processing.c b/i965_drv_video/i965_post_processing.c new file mode 100644 index 0000000..633100c --- /dev/null +++ b/i965_drv_video/i965_post_processing.c @@ -0,0 +1,2029 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao <haihao.xiang@intel.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <va/va_backend.h> + +#include "intel_batchbuffer.h" +#include "intel_driver.h" + +#include "i965_defines.h" +#include "i965_post_processing.h" +#include "i965_render.h" +#include "i965_drv_video.h" + +struct pp_module +{ + /* kernel */ + char *name; + int interface; + unsigned int (*bin)[4]; + int size; + dri_bo *bo; + + /* others */ + void (*initialize)(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth); +}; + +static uint32_t pp_null_gen5[][4] = { +#include "shaders/post_processing/null.g4b.gen5" +}; + +static uint32_t pp_nv12_load_save_gen5[][4] = { +#include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5" +}; + +static uint32_t pp_nv12_scaling_gen5[][4] = { +#include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5" +}; + +static uint32_t pp_nv12_avs_gen5[][4] = { +#include "shaders/post_processing/nv12_avs_nv12.g4b.gen5" +}; + +static uint32_t pp_nv12_dndi_gen5[][4] = { +#include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5" +}; + +static void ironlake_pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth); +static void ironlake_pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth); +static void ironlake_pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth); +static void ironlake_pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth); +static void ironlake_pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth); + +static struct pp_module pp_modules_gen5[] = { + { + "NULL module (for testing)", + PP_NULL, + pp_null_gen5, + sizeof(pp_null_gen5), + NULL, + ironlake_pp_null_initialize, + }, + + { + "NV12 Load & Save module", + PP_NV12_LOAD_SAVE, + pp_nv12_load_save_gen5, + sizeof(pp_nv12_load_save_gen5), + NULL, + ironlake_pp_nv12_load_save_initialize, + }, + + { + "NV12 Scaling module", + PP_NV12_SCALING, + pp_nv12_scaling_gen5, + sizeof(pp_nv12_scaling_gen5), + NULL, + ironlake_pp_nv12_scaling_initialize, + }, + + { + "NV12 AVS module", + PP_NV12_AVS, + pp_nv12_avs_gen5, + sizeof(pp_nv12_avs_gen5), + NULL, + ironlake_pp_nv12_avs_initialize, + }, + + { + "NV12 DNDI module", + PP_NV12_DNDI, + pp_nv12_dndi_gen5, + sizeof(pp_nv12_dndi_gen5), + NULL, + ironlake_pp_nv12_dndi_initialize, + }, +}; + +#define NUM_PP_MODULES ARRAY_ELEMS(pp_modules_gen5) + +static struct pp_module *pp_modules = NULL; + +struct ironlake_pp_static_parameter +{ + struct { + /* Procamp r1.0 */ + float procamp_constant_c0; + + /* Load and Same r1.1 */ + unsigned int source_packed_y_offset:8; + unsigned int source_packed_u_offset:8; + unsigned int source_packed_v_offset:8; + unsigned int pad0:8; + + union { + /* Load and Save r1.2 */ + struct { + unsigned int destination_packed_y_offset:8; + unsigned int destination_packed_u_offset:8; + unsigned int destination_packed_v_offset:8; + unsigned int pad0:8; + } load_and_save; + + /* CSC r1.2 */ + struct { + unsigned int destination_rgb_format:8; + unsigned int pad0:24; + } csc; + } r1_2; + + /* Procamp r1.3 */ + float procamp_constant_c1; + + /* Procamp r1.4 */ + float procamp_constant_c2; + + /* DI r1.5 */ + unsigned int statistics_surface_picth:16; /* Devided by 2 */ + unsigned int pad1:16; + + union { + /* DI r1.6 */ + struct { + unsigned int pad0:24; + unsigned int top_field_first:8; + } di; + + /* AVS/Scaling r1.6 */ + float normalized_video_y_scaling_step; + } r1_6; + + /* Procamp r1.7 */ + float procamp_constant_c5; + } grf1; + + struct { + /* Procamp r2.0 */ + float procamp_constant_c3; + + /* MBZ r2.1*/ + unsigned int pad0; + + /* WG+CSC r2.2 */ + float wg_csc_constant_c4; + + /* WG+CSC r2.3 */ + float wg_csc_constant_c8; + + /* Procamp r2.4 */ + float procamp_constant_c4; + + /* MBZ r2.5 */ + unsigned int pad1; + + /* MBZ r2.6 */ + unsigned int pad2; + + /* WG+CSC r2.7 */ + float wg_csc_constant_c9; + } grf2; + + struct { + /* WG+CSC r3.0 */ + float wg_csc_constant_c0; + + /* Blending r3.1 */ + float scaling_step_ratio; + + /* Blending r3.2 */ + float normalized_alpha_y_scaling; + + /* WG+CSC r3.3 */ + float wg_csc_constant_c4; + + /* WG+CSC r3.4 */ + float wg_csc_constant_c1; + + /* ALL r3.5 */ + int horizontal_origin_offset:16; + int vertical_origin_offset:16; + + /* Shared r3.6*/ + union { + /* Color filll */ + unsigned int color_pixel; + + /* WG+CSC */ + float wg_csc_constant_c2; + } r3_6; + + /* WG+CSC r3.7 */ + float wg_csc_constant_c3; + } grf3; + + struct { + /* WG+CSC r4.0 */ + float wg_csc_constant_c6; + + /* ALL r4.1 MBZ ???*/ + unsigned int pad0; + + /* Shared r4.2 */ + union { + /* AVS */ + struct { + unsigned int pad1:15; + unsigned int nlas:1; + unsigned int pad2:16; + } avs; + + /* DI */ + struct { + unsigned int motion_history_coefficient_m2:8; + unsigned int motion_history_coefficient_m1:8; + unsigned int pad0:16; + } di; + } r4_2; + + /* WG+CSC r4.3 */ + float wg_csc_constant_c7; + + /* WG+CSC r4.4 */ + float wg_csc_constant_c10; + + /* AVS r4.5 */ + float source_video_frame_normalized_horizontal_origin; + + /* MBZ r4.6 */ + unsigned int pad1; + + /* WG+CSC r4.7 */ + float wg_csc_constant_c11; + } grf4; +}; + +struct ironlake_pp_inline_parameter +{ + struct { + /* ALL r5.0 */ + int destination_block_horizontal_origin:16; + int destination_block_vertical_origin:16; + + /* Shared r5.1 */ + union { + /* AVS/Scaling */ + float source_surface_block_normalized_horizontal_origin; + + /* FMD */ + struct { + unsigned int variance_surface_vertical_origin:16; + unsigned int pad0:16; + } fmd; + } r5_1; + + /* AVS/Scaling r5.2 */ + float source_surface_block_normalized_vertical_origin; + + /* Alpha r5.3 */ + float alpha_surface_block_normalized_horizontal_origin; + + /* Alpha r5.4 */ + float alpha_surface_block_normalized_vertical_origin; + + /* Alpha r5.5 */ + unsigned int alpha_mask_x:16; + unsigned int alpha_mask_y:8; + unsigned int block_count_x:8; + + /* r5.6 */ + unsigned int block_horizontal_mask:16; + unsigned int block_vertical_mask:8; + unsigned int number_blocks:8; + + /* AVS/Scaling r5.7 */ + float normalized_video_x_scaling_step; + } grf5; + + struct { + /* AVS r6.0 */ + float video_step_delta; + + /* r6.1-r6.7 */ + unsigned int padx[7]; + } grf6; +}; + +static struct ironlake_pp_static_parameter ironlake_pp_static_parameter; +static struct ironlake_pp_inline_parameter ironlake_pp_inline_parameter; + +static void +ironlake_pp_surface_state(struct i965_post_processing_context *pp_context) +{ + +} + +static void +ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context) +{ + struct i965_interface_descriptor *desc; + dri_bo *bo; + int pp_index = pp_context->current_pp; + + bo = pp_context->idrt.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + desc = bo->virtual; + memset(desc, 0, sizeof(*desc)); + desc->desc0.grf_reg_blocks = 10; + desc->desc0.kernel_start_pointer = pp_modules[pp_index].bo->offset >> 6; /* reloc */ + desc->desc1.const_urb_entry_read_offset = 0; + desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */ + desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5; + desc->desc2.sampler_count = 0; + desc->desc3.binding_table_entry_count = 0; + desc->desc3.binding_table_pointer = + pp_context->binding_table.bo->offset >> 5; /*reloc */ + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + desc->desc0.grf_reg_blocks, + offsetof(struct i965_interface_descriptor, desc0), + pp_modules[pp_index].bo); + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + desc->desc2.sampler_count << 2, + offsetof(struct i965_interface_descriptor, desc2), + pp_context->sampler_state_table.bo); + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + desc->desc3.binding_table_entry_count, + offsetof(struct i965_interface_descriptor, desc3), + pp_context->binding_table.bo); + + dri_bo_unmap(bo); +} + +static void +ironlake_pp_binding_table(struct i965_post_processing_context *pp_context) +{ + unsigned int *binding_table; + dri_bo *bo = pp_context->binding_table.bo; + int i; + + dri_bo_map(bo, 1); + assert(bo->virtual); + binding_table = bo->virtual; + memset(binding_table, 0, bo->size); + + for (i = 0; i < MAX_PP_SURFACES; i++) { + if (pp_context->surfaces[i].ss_bo) { + assert(pp_context->surfaces[i].s_bo); + + binding_table[i] = pp_context->surfaces[i].ss_bo->offset; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(*binding_table), + pp_context->surfaces[i].ss_bo); + } + + } + + dri_bo_unmap(bo); +} + +static void +ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context) +{ + struct i965_vfe_state *vfe_state; + dri_bo *bo; + + bo = pp_context->vfe_state.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + vfe_state = bo->virtual; + memset(vfe_state, 0, sizeof(*vfe_state)); + vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1; + vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1; + vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries; + vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE; + vfe_state->vfe1.children_present = 0; + vfe_state->vfe2.interface_descriptor_base = + pp_context->idrt.bo->offset >> 4; /* reloc */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + offsetof(struct i965_vfe_state, vfe2), + pp_context->idrt.bo); + dri_bo_unmap(bo); +} + +static void +ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context) +{ + unsigned char *constant_buffer; + + assert(sizeof(ironlake_pp_static_parameter) == 128); + dri_bo_map(pp_context->curbe.bo, 1); + assert(pp_context->curbe.bo->virtual); + constant_buffer = pp_context->curbe.bo->virtual; + memcpy(constant_buffer, &ironlake_pp_static_parameter, sizeof(ironlake_pp_static_parameter)); + dri_bo_unmap(pp_context->curbe.bo); +} + +static void +ironlake_pp_states_setup(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + + ironlake_pp_surface_state(pp_context); + ironlake_pp_binding_table(pp_context); + ironlake_pp_interface_descriptor_table(pp_context); + ironlake_pp_vfe_state(pp_context); + ironlake_pp_upload_constants(pp_context); +} + +static void +ironlake_pp_pipeline_select(VADriverContextP ctx) +{ + BEGIN_BATCH(ctx, 1); + OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(ctx); +} + +static void +ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context) +{ + unsigned int vfe_fence, cs_fence; + + vfe_fence = pp_context->urb.cs_start; + cs_fence = pp_context->urb.size; + + BEGIN_BATCH(ctx, 3); + OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, + (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */ + (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */ + ADVANCE_BATCH(ctx); +} + +static void +ironlake_pp_state_base_address(VADriverContextP ctx) +{ + BEGIN_BATCH(ctx, 8); + OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); + OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(ctx); +} + +static void +ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context) +{ + BEGIN_BATCH(ctx, 3); + OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1); + OUT_BATCH(ctx, 0); + OUT_RELOC(ctx, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(ctx); +} + +static void +ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context) +{ + BEGIN_BATCH(ctx, 2); + OUT_BATCH(ctx, CMD_CS_URB_STATE | 0); + OUT_BATCH(ctx, + ((pp_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */ + (pp_context->urb.num_cs_entries << 0)); /* Number of URB Entries */ + ADVANCE_BATCH(ctx); +} + +static void +ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context) +{ + BEGIN_BATCH(ctx, 2); + OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); + OUT_RELOC(ctx, pp_context->curbe.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + pp_context->urb.size_cs_entry - 1); + ADVANCE_BATCH(ctx); +} + +static void +ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context) +{ + int x, x_steps, y, y_steps; + + x_steps = pp_context->pp_x_steps(&pp_context->private_context); + y_steps = pp_context->pp_y_steps(&pp_context->private_context); + + for (y = 0; y < y_steps; y++) { + for (x = 0; x < x_steps; x++) { + if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) { + BEGIN_BATCH(ctx, 20); + OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 18); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); /* no indirect data */ + OUT_BATCH(ctx, 0); + + /* inline data grf 5-6 */ + assert(sizeof(ironlake_pp_inline_parameter) == 64); + intel_batchbuffer_data(ctx, &ironlake_pp_inline_parameter, sizeof(ironlake_pp_inline_parameter)); + + ADVANCE_BATCH(ctx); + } + } + } +} + +static void +ironlake_pp_pipeline_setup(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + + intel_batchbuffer_start_atomic(ctx, 0x1000); + intel_batchbuffer_emit_mi_flush(ctx); + ironlake_pp_pipeline_select(ctx); + ironlake_pp_state_base_address(ctx); + ironlake_pp_state_pointers(ctx, pp_context); + ironlake_pp_urb_layout(ctx, pp_context); + ironlake_pp_cs_urb_layout(ctx, pp_context); + ironlake_pp_constant_buffer(ctx, pp_context); + ironlake_pp_object_walker(ctx, pp_context); + intel_batchbuffer_end_atomic(ctx); +} + +static int +pp_null_x_steps(void *private_context) +{ + return 1; +} + +static int +pp_null_y_steps(void *private_context) +{ + return 1; +} + +static int +pp_null_set_block_parameter(void *private_context, int x, int y) +{ + return 0; +} + +static void +ironlake_pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + struct object_surface *obj_surface; + + /* surface */ + obj_surface = SURFACE(surface); + dri_bo_unreference(obj_surface->pp_out_bo); + obj_surface->pp_out_bo = obj_surface->bo; + dri_bo_reference(obj_surface->pp_out_bo); + assert(obj_surface->pp_out_bo); + obj_surface->pp_out_width = obj_surface->width; + obj_surface->pp_out_height = obj_surface->height; + obj_surface->orig_pp_out_width = obj_surface->orig_width; + obj_surface->orig_pp_out_height = obj_surface->orig_height; + + /* private function & data */ + pp_context->pp_x_steps = pp_null_x_steps; + pp_context->pp_y_steps = pp_null_y_steps; + pp_context->pp_set_block_parameter = pp_null_set_block_parameter; +} + +static int +pp_load_save_x_steps(void *private_context) +{ + return 1; +} + +static int +pp_load_save_y_steps(void *private_context) +{ + struct pp_load_save_context *pp_load_save_context = private_context; + + return pp_load_save_context->dest_h / 8; +} + +static int +pp_load_save_set_block_parameter(void *private_context, int x, int y) +{ + ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff; + ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff; + ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16; + ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8; + + return 0; +} + +static void +ironlake_pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context; + struct object_surface *obj_surface; + struct i965_surface_state *ss; + dri_bo *bo; + int index, w, h; + int orig_w, orig_h; + + /* surface */ + obj_surface = SURFACE(surface); + orig_w = obj_surface->orig_width; + orig_h = obj_surface->orig_height; + w = obj_surface->width; + h = obj_surface->height; + + dri_bo_unreference(obj_surface->pp_out_bo); + obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr, + "intermediate surface", + SIZE_YUV420(w, h), + 4096); + assert(obj_surface->pp_out_bo); + obj_surface->pp_out_width = obj_surface->width; + obj_surface->pp_out_height = obj_surface->height; + obj_surface->orig_pp_out_width = obj_surface->orig_width; + obj_surface->orig_pp_out_height = obj_surface->orig_height; + + /* source Y surface index 1 */ + index = 1; + pp_context->surfaces[index].s_bo = obj_surface->bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset; + ss->ss2.width = orig_w / 4 - 1; + ss->ss2.height = orig_h - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + 0, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* source UV surface index 2 */ + index = 2; + pp_context->surfaces[index].s_bo = obj_surface->bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h; + ss->ss2.width = orig_w / 4 - 1; + ss->ss2.height = orig_h / 2 - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + w * h, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* destination Y surface index 7 */ + index = 7; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset; + ss->ss2.width = orig_w / 4 - 1; + ss->ss2.height = orig_h - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* destination UV surface index 8 */ + index = 8; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h; + ss->ss2.width = orig_w / 4 - 1; + ss->ss2.height = orig_h / 2 - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + w * h, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* private function & data */ + pp_context->pp_x_steps = pp_load_save_x_steps; + pp_context->pp_y_steps = pp_load_save_y_steps; + pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter; + pp_load_save_context->dest_h = h; + pp_load_save_context->dest_w = w; + + ironlake_pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */ + ironlake_pp_inline_parameter.grf5.number_blocks = w / 16; +} + +static int +pp_scaling_x_steps(void *private_context) +{ + return 1; +} + +static int +pp_scaling_y_steps(void *private_context) +{ + struct pp_scaling_context *pp_scaling_context = private_context; + + return pp_scaling_context->dest_h / 8; +} + +static int +pp_scaling_set_block_parameter(void *private_context, int x, int y) +{ + float src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step; + float src_y_steping = ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step; + + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16; + ironlake_pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8; + ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16; + ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8; + + return 0; +} + +static void +ironlake_pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context; + struct object_surface *obj_surface; + struct i965_sampler_state *sampler_state; + struct i965_surface_state *ss; + dri_bo *bo; + int index; + int w, h; + int orig_w, orig_h; + int pp_out_w, pp_out_h; + int orig_pp_out_w, orig_pp_out_h; + + /* surface */ + obj_surface = SURFACE(surface); + orig_w = obj_surface->orig_width; + orig_h = obj_surface->orig_height; + w = obj_surface->width; + h = obj_surface->height; + + orig_pp_out_w = destw; + orig_pp_out_h = desth; + pp_out_w = ALIGN(orig_pp_out_w, 16); + pp_out_h = ALIGN(orig_pp_out_h, 16); + dri_bo_unreference(obj_surface->pp_out_bo); + obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr, + "intermediate surface", + SIZE_YUV420(pp_out_w, pp_out_h), + 4096); + assert(obj_surface->pp_out_bo); + obj_surface->orig_pp_out_width = orig_pp_out_w; + obj_surface->orig_pp_out_height = orig_pp_out_h; + obj_surface->pp_out_width = pp_out_w; + obj_surface->pp_out_height = pp_out_h; + + /* source Y surface index 1 */ + index = 1; + pp_context->surfaces[index].s_bo = obj_surface->bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset; + ss->ss2.width = orig_w - 1; + ss->ss2.height = orig_h - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + 0, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* source UV surface index 2 */ + index = 2; + pp_context->surfaces[index].s_bo = obj_surface->bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h; + ss->ss2.width = orig_w / 2 - 1; + ss->ss2.height = orig_h / 2 - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + w * h, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* destination Y surface index 7 */ + index = 7; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset; + ss->ss2.width = pp_out_w / 4 - 1; + ss->ss2.height = pp_out_h - 1; + ss->ss3.pitch = pp_out_w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* destination UV surface index 8 */ + index = 8; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h; + ss->ss2.width = pp_out_w / 4 - 1; + ss->ss2.height = pp_out_h / 2 - 1; + ss->ss3.pitch = pp_out_w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + pp_out_w * pp_out_h, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* sampler state */ + dri_bo_map(pp_context->sampler_state_table.bo, True); + assert(pp_context->sampler_state_table.bo->virtual); + sampler_state = pp_context->sampler_state_table.bo->virtual; + + /* SIMD16 Y index 1 */ + sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR; + sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR; + sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP; + + /* SIMD16 UV index 2 */ + sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR; + sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR; + sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP; + + dri_bo_unmap(pp_context->sampler_state_table.bo); + + /* private function & data */ + pp_context->pp_x_steps = pp_scaling_x_steps; + pp_context->pp_y_steps = pp_scaling_y_steps; + pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter; + + pp_scaling_context->dest_w = pp_out_w; + pp_scaling_context->dest_h = pp_out_h; + + ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h; + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w; + ironlake_pp_inline_parameter.grf5.block_count_x = pp_out_w / 16; /* 1 x N */ + ironlake_pp_inline_parameter.grf5.number_blocks = pp_out_w / 16; + ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff; + ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff; +} + +static int +pp_avs_x_steps(void *private_context) +{ + struct pp_avs_context *pp_avs_context = private_context; + + return pp_avs_context->dest_w / 16; +} + +static int +pp_avs_y_steps(void *private_context) +{ + return 1; +} + +static int +pp_avs_set_block_parameter(void *private_context, int x, int y) +{ + struct pp_avs_context *pp_avs_context = private_context; + float src_x_steping, src_y_steping, video_step_delta; + int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16); + + if (tmp_w >= pp_avs_context->dest_w) { + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w; + ironlake_pp_inline_parameter.grf6.video_step_delta = 0; + + if (x == 0) { + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2; + } else { + src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step; + video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta; + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 + + 16 * 15 * video_step_delta / 2; + } + } else { + int n0, n1, n2, nls_left, nls_right; + int factor_a = 5, factor_b = 4; + float f; + + n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2); + n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0; + n2 = tmp_w / (16 * factor_a); + nls_left = n0 + n2; + nls_right = n1 + n2; + f = (float) n2 * 16 / tmp_w; + + if (n0 < 5) { + ironlake_pp_inline_parameter.grf6.video_step_delta = 0.0; + + if (x == 0) { + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w; + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0; + } else { + src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step; + video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta; + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 + + 16 * 15 * video_step_delta / 2; + } + } else { + if (x < nls_left) { + /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */ + float a = f / (nls_left * 16 * factor_b); + float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1)); + + ironlake_pp_inline_parameter.grf6.video_step_delta = b; + + if (x == 0) { + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0; + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = a; + } else { + src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step; + video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta; + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 + + 16 * 15 * video_step_delta / 2; + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b; + } + } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) { + /* scale the center linearly */ + src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step; + video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta; + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 + + 16 * 15 * video_step_delta / 2; + ironlake_pp_inline_parameter.grf6.video_step_delta = 0.0; + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w; + } else { + float a = f / (nls_right * 16 * factor_b); + float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1)); + + src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step; + video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta; + ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 + + 16 * 15 * video_step_delta / 2; + ironlake_pp_inline_parameter.grf6.video_step_delta = -b; + + if (x == (pp_avs_context->dest_w / 16 - nls_right)) + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16 - 1) * b; + else + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16; + } + } + } + + src_y_steping = ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step; + ironlake_pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8; + ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16; + ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8; + + return 0; +} + +static void +ironlake_pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct object_surface *obj_surface; + struct i965_surface_state *ss; + struct i965_sampler_8x8 *sampler_8x8; + struct i965_sampler_8x8_state *sampler_8x8_state; + struct i965_surface_state2 *ss_8x8; + dri_bo *bo; + int index; + int w, h; + int orig_w, orig_h; + int pp_out_w, pp_out_h; + int orig_pp_out_w, orig_pp_out_h; + + /* surface */ + obj_surface = SURFACE(surface); + + if (input == 1) { + assert(obj_surface->pp_out_bo); + orig_w = obj_surface->orig_pp_out_width; + orig_h = obj_surface->orig_pp_out_height; + w = obj_surface->pp_out_width; + h = obj_surface->pp_out_height; + } else { + orig_w = obj_surface->orig_width; + orig_h = obj_surface->orig_height; + w = obj_surface->width; + h = obj_surface->height; + } + /* source Y surface index 1 */ + index = 1; + pp_context->surfaces[index].s_bo = (input == 1 ? obj_surface->pp_out_bo : obj_surface->bo); + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Y surface state for sample_8x8", + sizeof(struct i965_surface_state2), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss_8x8 = bo->virtual; + memset(ss_8x8, 0, sizeof(*ss_8x8)); + ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset; + ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0; + ss_8x8->ss1.width = orig_w - 1; + ss_8x8->ss1.height = orig_h - 1; + ss_8x8->ss2.half_pitch_for_chroma = 0; + ss_8x8->ss2.pitch = w - 1; + ss_8x8->ss2.interleave_chroma = 0; + ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM; + ss_8x8->ss3.x_offset_for_cb = 0; + ss_8x8->ss3.y_offset_for_cb = 0; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + 0, + offsetof(struct i965_surface_state2, ss0), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* source UV surface index 2 */ + index = 2; + pp_context->surfaces[index].s_bo = (input == 1 ? obj_surface->pp_out_bo : obj_surface->bo); + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "UV surface state for sample_8x8", + sizeof(struct i965_surface_state2), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss_8x8 = bo->virtual; + memset(ss_8x8, 0, sizeof(*ss_8x8)); + ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h; + ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0; + ss_8x8->ss1.width = orig_w / 2 - 1; + ss_8x8->ss1.height = orig_h / 2 - 1; + ss_8x8->ss2.half_pitch_for_chroma = 0; + ss_8x8->ss2.pitch = w - 1; + ss_8x8->ss2.interleave_chroma = 0; + ss_8x8->ss2.surface_format = SURFACE_FORMAT_R8B8_UNORM; + ss_8x8->ss3.x_offset_for_cb = 0; + ss_8x8->ss3.y_offset_for_cb = 0; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + w * h, + offsetof(struct i965_surface_state2, ss0), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + orig_pp_out_w = destw; + orig_pp_out_h = desth; + pp_out_w = ALIGN(orig_pp_out_w, 16); + pp_out_h = ALIGN(orig_pp_out_h, 16); + dri_bo_unreference(obj_surface->pp_out_bo); + obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr, + "intermediate surface", + SIZE_YUV420(pp_out_w, pp_out_h), + 4096); + assert(obj_surface->pp_out_bo); + obj_surface->orig_pp_out_width = orig_pp_out_w; + obj_surface->orig_pp_out_height = orig_pp_out_h; + obj_surface->pp_out_width = pp_out_w; + obj_surface->pp_out_height = pp_out_h; + + /* destination Y surface index 7 */ + index = 7; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset; + ss->ss2.width = pp_out_w / 4 - 1; + ss->ss2.height = pp_out_h - 1; + ss->ss3.pitch = pp_out_w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* destination UV surface index 8 */ + index = 8; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h; + ss->ss2.width = pp_out_w / 4 - 1; + ss->ss2.height = pp_out_h / 2 - 1; + ss->ss3.pitch = pp_out_w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + pp_out_w * pp_out_h, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* sampler 8x8 state */ + dri_bo_map(pp_context->sampler_state_table.bo_8x8, True); + assert(pp_context->sampler_state_table.bo_8x8->virtual); + assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138); + sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual; + memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); + sampler_8x8_state->dw136.default_sharpness_level = 0; + sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1; + sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1; + sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1; + dri_bo_unmap(pp_context->sampler_state_table.bo_8x8); + + /* sampler 8x8 */ + dri_bo_map(pp_context->sampler_state_table.bo, True); + assert(pp_context->sampler_state_table.bo->virtual); + assert(sizeof(*sampler_8x8) == sizeof(int) * 16); + sampler_8x8 = pp_context->sampler_state_table.bo->virtual; + + /* sample_8x8 Y index 1 */ + index = 1; + memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); + sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP; + sampler_8x8[index].dw0.ief_bypass = 0; + sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL; + sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5; + sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5; + sampler_8x8[index].dw2.global_noise_estimation = 22; + sampler_8x8[index].dw2.strong_edge_threshold = 8; + sampler_8x8[index].dw2.weak_edge_threshold = 1; + sampler_8x8[index].dw3.strong_edge_weight = 7; + sampler_8x8[index].dw3.regular_weight = 2; + sampler_8x8[index].dw3.non_edge_weight = 0; + sampler_8x8[index].dw3.gain_factor = 40; + sampler_8x8[index].dw4.steepness_boost = 0; + sampler_8x8[index].dw4.steepness_threshold = 0; + sampler_8x8[index].dw4.mr_boost = 0; + sampler_8x8[index].dw4.mr_threshold = 5; + sampler_8x8[index].dw5.pwl1_point_1 = 4; + sampler_8x8[index].dw5.pwl1_point_2 = 12; + sampler_8x8[index].dw5.pwl1_point_3 = 16; + sampler_8x8[index].dw5.pwl1_point_4 = 26; + sampler_8x8[index].dw6.pwl1_point_5 = 40; + sampler_8x8[index].dw6.pwl1_point_6 = 160; + sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127; + sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98; + sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88; + sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64; + sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44; + sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0; + sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0; + sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3; + sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32; + sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32; + sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58; + sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100; + sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108; + sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88; + sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116; + sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20; + sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96; + sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32; + sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50; + sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0; + sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0; + sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116; + sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0; + sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114; + sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67; + sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9; + sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3; + sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15; + sampler_8x8[index].dw13.limiter_boost = 0; + sampler_8x8[index].dw13.minimum_limiter = 10; + sampler_8x8[index].dw13.maximum_limiter = 11; + sampler_8x8[index].dw14.clip_limiter = 130; + dri_bo_emit_reloc(pp_context->sampler_state_table.bo, + I915_GEM_DOMAIN_RENDER, + 0, + 0, + sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), + pp_context->sampler_state_table.bo_8x8); + + dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True); + assert(pp_context->sampler_state_table.bo_8x8_uv->virtual); + assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138); + sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual; + memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); + sampler_8x8_state->dw136.default_sharpness_level = 0; + sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0; + sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1; + sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1; + dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv); + + /* sample_8x8 UV index 2 */ + index = 2; + memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); + sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST; + sampler_8x8[index].dw0.ief_bypass = 0; + sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL; + sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5; + sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5; + sampler_8x8[index].dw2.global_noise_estimation = 22; + sampler_8x8[index].dw2.strong_edge_threshold = 8; + sampler_8x8[index].dw2.weak_edge_threshold = 1; + sampler_8x8[index].dw3.strong_edge_weight = 7; + sampler_8x8[index].dw3.regular_weight = 2; + sampler_8x8[index].dw3.non_edge_weight = 0; + sampler_8x8[index].dw3.gain_factor = 40; + sampler_8x8[index].dw4.steepness_boost = 0; + sampler_8x8[index].dw4.steepness_threshold = 0; + sampler_8x8[index].dw4.mr_boost = 0; + sampler_8x8[index].dw4.mr_threshold = 5; + sampler_8x8[index].dw5.pwl1_point_1 = 4; + sampler_8x8[index].dw5.pwl1_point_2 = 12; + sampler_8x8[index].dw5.pwl1_point_3 = 16; + sampler_8x8[index].dw5.pwl1_point_4 = 26; + sampler_8x8[index].dw6.pwl1_point_5 = 40; + sampler_8x8[index].dw6.pwl1_point_6 = 160; + sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127; + sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98; + sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88; + sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64; + sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44; + sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0; + sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0; + sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3; + sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32; + sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32; + sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58; + sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100; + sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108; + sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88; + sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116; + sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20; + sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96; + sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32; + sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50; + sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0; + sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0; + sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116; + sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0; + sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114; + sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67; + sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9; + sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3; + sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15; + sampler_8x8[index].dw13.limiter_boost = 0; + sampler_8x8[index].dw13.minimum_limiter = 10; + sampler_8x8[index].dw13.maximum_limiter = 11; + sampler_8x8[index].dw14.clip_limiter = 130; + dri_bo_emit_reloc(pp_context->sampler_state_table.bo, + I915_GEM_DOMAIN_RENDER, + 0, + 0, + sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), + pp_context->sampler_state_table.bo_8x8_uv); + + dri_bo_unmap(pp_context->sampler_state_table.bo); + + /* private function & data */ + pp_context->pp_x_steps = pp_avs_x_steps; + pp_context->pp_y_steps = pp_avs_y_steps; + pp_context->pp_set_block_parameter = pp_avs_set_block_parameter; + + pp_avs_context->dest_w = pp_out_w; + pp_avs_context->dest_h = pp_out_h; + pp_avs_context->src_w = w; + pp_avs_context->src_h = h; + + ironlake_pp_static_parameter.grf4.r4_2.avs.nlas = 1; + ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h; + ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w; + ironlake_pp_inline_parameter.grf5.block_count_x = 1; /* M x 1 */ + ironlake_pp_inline_parameter.grf5.number_blocks = pp_out_h / 8; + ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff; + ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff; + ironlake_pp_inline_parameter.grf6.video_step_delta = 0.0; +} + +static int +pp_dndi_x_steps(void *private_context) +{ + return 1; +} + +static int +pp_dndi_y_steps(void *private_context) +{ + struct pp_dndi_context *pp_dndi_context = private_context; + + return pp_dndi_context->dest_h / 4; +} + +static int +pp_dndi_set_block_parameter(void *private_context, int x, int y) +{ + ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16; + ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4; + + return 0; +} + +static +void ironlake_pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input, + unsigned short srcw, unsigned short srch, + unsigned short destw, unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context; + struct object_surface *obj_surface; + struct i965_surface_state *ss; + struct i965_surface_state2 *ss_dndi; + struct i965_sampler_dndi *sampler_dndi; + dri_bo *bo; + int index; + int w, h; + int orig_w, orig_h; + + /* surface */ + obj_surface = SURFACE(surface); + orig_w = obj_surface->orig_width; + orig_h = obj_surface->orig_height; + w = obj_surface->width; + h = obj_surface->height; + + if (pp_context->stmm.bo == NULL) { + pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_context->stmm.bo); + } + + dri_bo_unreference(obj_surface->pp_out_bo); + obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr, + "intermediate surface", + SIZE_YUV420(w, h), + 4096); + assert(obj_surface->pp_out_bo); + obj_surface->orig_pp_out_width = orig_w; + obj_surface->orig_pp_out_height = orig_h; + obj_surface->pp_out_width = w; + obj_surface->pp_out_height = h; + + /* source UV surface index 2 */ + index = 2; + pp_context->surfaces[index].s_bo = obj_surface->bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h; + ss->ss2.width = orig_w / 4 - 1; + ss->ss2.height = orig_h / 2 - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + w * h, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* source YUV surface index 4 */ + index = 4; + pp_context->surfaces[index].s_bo = obj_surface->bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "YUV surface state for deinterlace ", + sizeof(struct i965_surface_state2), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss_dndi = bo->virtual; + memset(ss_dndi, 0, sizeof(*ss_dndi)); + ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset; + ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0; + ss_dndi->ss1.width = w - 1; + ss_dndi->ss1.height = h - 1; + ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1; + ss_dndi->ss2.half_pitch_for_chroma = 0; + ss_dndi->ss2.pitch = w - 1; + ss_dndi->ss2.interleave_chroma = 1; + ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8; + ss_dndi->ss2.half_pitch_for_chroma = 0; + ss_dndi->ss2.tiled_surface = 0; + ss_dndi->ss3.x_offset_for_cb = 0; + ss_dndi->ss3.y_offset_for_cb = h; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + 0, + 0, + offsetof(struct i965_surface_state2, ss0), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* source STMM surface index 20 */ + index = 20; + pp_context->surfaces[index].s_bo = pp_context->stmm.bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface state for deinterlace ", + sizeof(struct i965_surface_state2), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset; + ss->ss2.width = w - 1; + ss->ss2.height = h - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* destination Y surface index 7 */ + index = 7; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset; + ss->ss2.width = w / 4 - 1; + ss->ss2.height = h - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* destination UV surface index 8 */ + index = 8; + pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo; + dri_bo_reference(pp_context->surfaces[index].s_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state", + sizeof(struct i965_surface_state), + 4096); + assert(bo); + pp_context->surfaces[index].ss_bo = bo; + dri_bo_map(bo, True); + assert(bo->virtual); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM; + ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h; + ss->ss2.width = w / 4 - 1; + ss->ss2.height = h / 2 - 1; + ss->ss3.pitch = w - 1; + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + w * h, + offsetof(struct i965_surface_state, ss1), + pp_context->surfaces[index].s_bo); + dri_bo_unmap(bo); + + /* sampler dndi */ + dri_bo_map(pp_context->sampler_state_table.bo, True); + assert(pp_context->sampler_state_table.bo->virtual); + assert(sizeof(*sampler_dndi) == sizeof(int) * 8); + sampler_dndi = pp_context->sampler_state_table.bo->virtual; + + /* sample dndi index 1 */ + index = 0; + sampler_dndi[index].dw0.denoise_asd_threshold = 0; + sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8 + sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240 + sampler_dndi[index].dw0.denoise_stad_threshold = 0; + + sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64; + sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0; + sampler_dndi[index].dw1.stmm_c2 = 0; + sampler_dndi[index].dw1.low_temporal_difference_threshold = 8; + sampler_dndi[index].dw1.temporal_difference_threshold = 16; + + sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31 + sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15 + sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15 + sampler_dndi[index].dw2.good_neighbor_threshold = 7; // 0-63 + + sampler_dndi[index].dw3.maximum_stmm = 128; + sampler_dndi[index].dw3.multipler_for_vecm = 2; + sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0; + sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64; + sampler_dndi[index].dw3.stmm_blending_constant_select = 0; + + sampler_dndi[index].dw4.sdi_delta = 8; + sampler_dndi[index].dw4.sdi_threshold = 128; + sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift + sampler_dndi[index].dw4.stmm_shift_up = 0; + sampler_dndi[index].dw4.stmm_shift_down = 0; + sampler_dndi[index].dw4.minimum_stmm = 0; + + sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0; + sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0; + + sampler_dndi[index].dw6.dn_enable = 1; + sampler_dndi[index].dw6.di_enable = 1; + sampler_dndi[index].dw6.di_partial = 0; + sampler_dndi[index].dw6.dndi_top_first = 1; + sampler_dndi[index].dw6.dndi_stream_id = 1; + sampler_dndi[index].dw6.dndi_first_frame = 1; + sampler_dndi[index].dw6.progressive_dn = 0; + sampler_dndi[index].dw6.fmd_tear_threshold = 32; + sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32; + sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32; + + sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2; + sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1; + sampler_dndi[index].dw7.vdi_walker_enable = 0; + sampler_dndi[index].dw7.column_width_minus1 = w / 16; + + dri_bo_unmap(pp_context->sampler_state_table.bo); + + /* private function & data */ + pp_context->pp_x_steps = pp_dndi_x_steps; + pp_context->pp_y_steps = pp_dndi_y_steps; + pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter; + + ironlake_pp_static_parameter.grf1.statistics_surface_picth = w / 2; + ironlake_pp_static_parameter.grf1.r1_6.di.top_field_first = 0; + ironlake_pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64; + ironlake_pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192; + + ironlake_pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */ + ironlake_pp_inline_parameter.grf5.number_blocks = w / 16; + ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff; + ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff; + + pp_dndi_context->dest_w = w; + pp_dndi_context->dest_h = h; +} + +static void +ironlake_pp_initialize(VADriverContextP ctx, + VASurfaceID surface, + int input, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth, + int pp_index) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + struct pp_module *pp_module; + dri_bo *bo; + int i; + + dri_bo_unreference(pp_context->curbe.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "constant buffer", + 4096, + 4096); + assert(bo); + pp_context->curbe.bo = bo; + + dri_bo_unreference(pp_context->binding_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "binding table", + sizeof(unsigned int), + 4096); + assert(bo); + pp_context->binding_table.bo = bo; + + dri_bo_unreference(pp_context->idrt.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "interface discriptor", + sizeof(struct i965_interface_descriptor), + 4096); + assert(bo); + pp_context->idrt.bo = bo; + + dri_bo_unreference(pp_context->sampler_state_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "sampler state table", + 4096, + 4096); + assert(bo); + dri_bo_map(bo, True); + memset(bo->virtual, 0, bo->size); + dri_bo_unmap(bo); + pp_context->sampler_state_table.bo = bo; + + dri_bo_unreference(pp_context->sampler_state_table.bo_8x8); + bo = dri_bo_alloc(i965->intel.bufmgr, + "sampler 8x8 state ", + 4096, + 4096); + assert(bo); + pp_context->sampler_state_table.bo_8x8 = bo; + + dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv); + bo = dri_bo_alloc(i965->intel.bufmgr, + "sampler 8x8 state ", + 4096, + 4096); + assert(bo); + pp_context->sampler_state_table.bo_8x8_uv = bo; + + dri_bo_unreference(pp_context->vfe_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vfe state", + sizeof(struct i965_vfe_state), + 4096); + assert(bo); + pp_context->vfe_state.bo = bo; + + for (i = 0; i < MAX_PP_SURFACES; i++) { + dri_bo_unreference(pp_context->surfaces[i].ss_bo); + pp_context->surfaces[i].ss_bo = NULL; + + dri_bo_unreference(pp_context->surfaces[i].s_bo); + pp_context->surfaces[i].s_bo = NULL; + } + + memset(&ironlake_pp_static_parameter, 0, sizeof(ironlake_pp_static_parameter)); + memset(&ironlake_pp_inline_parameter, 0, sizeof(ironlake_pp_inline_parameter)); + assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES); + assert(pp_modules); + pp_context->current_pp = pp_index; + pp_module = &pp_modules[pp_index]; + + if (pp_module->initialize) + pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth); +} + +static void +i965_post_processing_internal(VADriverContextP ctx, + VASurfaceID surface, + int input, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth, + int pp_index) +{ + ironlake_pp_initialize(ctx, surface, input, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + pp_index); + ironlake_pp_states_setup(ctx); + ironlake_pp_pipeline_setup(ctx); +} + +void +i965_post_processing(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth, + unsigned int flag) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + if (IS_IRONLAKE(i965->intel.device_id)) { + /* Currently only support post processing for NV12 surface */ + if (i965->render_state.interleaved_uv) { + int input = 0; + + if (flag & I965_PP_FLAG_DEINTERLACING) { + i965_post_processing_internal(ctx, surface, input, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + PP_NV12_DNDI); + input = 1; + } + + if (flag & I965_PP_FLAG_AVS) { + i965_post_processing_internal(ctx, surface, input, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + PP_NV12_AVS); + } + } + } +} + +void +i965_post_processing_once_init(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + int i; + + pp_context->urb.size = URB_SIZE((&i965->intel)); + pp_context->urb.num_vfe_entries = 32; + pp_context->urb.size_vfe_entry = 1; + pp_context->urb.num_cs_entries = 1; + pp_context->urb.size_cs_entry = 2; + pp_context->urb.vfe_start = 0; + pp_context->urb.cs_start = pp_context->urb.vfe_start + + pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry; + assert(pp_context->urb.cs_start + + pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + + if (IS_IRONLAKE(i965->intel.device_id)) { + pp_modules = pp_modules_gen5; + } + + for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) { + struct pp_module *pp_module = &pp_modules[i]; + pp_module->bo = dri_bo_alloc(i965->intel.bufmgr, + pp_module->name, + pp_module->size, + 4096); + assert(pp_module->bo); + dri_bo_subdata(pp_module->bo, 0, pp_module->size, pp_module->bin); + } +} + +Bool +i965_post_processing_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_post_processing_context *pp_context = &i965->render_state.pp_context; + int i; + + dri_bo_unreference(pp_context->curbe.bo); + pp_context->curbe.bo = NULL; + + for (i = 0; i < MAX_PP_SURFACES; i++) { + dri_bo_unreference(pp_context->surfaces[i].ss_bo); + pp_context->surfaces[i].ss_bo = NULL; + + dri_bo_unreference(pp_context->surfaces[i].s_bo); + pp_context->surfaces[i].s_bo = NULL; + } + + dri_bo_unreference(pp_context->sampler_state_table.bo); + pp_context->sampler_state_table.bo = NULL; + + dri_bo_unreference(pp_context->sampler_state_table.bo_8x8); + pp_context->sampler_state_table.bo_8x8 = NULL; + + dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv); + pp_context->sampler_state_table.bo_8x8_uv = NULL; + + dri_bo_unreference(pp_context->binding_table.bo); + pp_context->binding_table.bo = NULL; + + dri_bo_unreference(pp_context->idrt.bo); + pp_context->idrt.bo = NULL; + + dri_bo_unreference(pp_context->vfe_state.bo); + pp_context->vfe_state.bo = NULL; + + dri_bo_unreference(pp_context->stmm.bo); + pp_context->stmm.bo = NULL; + + for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) { + struct pp_module *pp_module = &pp_modules[i]; + + dri_bo_unreference(pp_module->bo); + pp_module->bo = NULL; + } + + return True; +} diff --git a/i965_drv_video/i965_post_processing.h b/i965_drv_video/i965_post_processing.h new file mode 100644 index 0000000..360ded4 --- /dev/null +++ b/i965_drv_video/i965_post_processing.h @@ -0,0 +1,150 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao <haihao.xiang@intel.com> + * + */ + +#ifndef __I965_POST_PROCESSING_H__ +#define __I965_POST_PROCESSING_H__ + +#define MAX_PP_SURFACES 32 + +#define I965_PP_FLAG_DEINTERLACING 1 +#define I965_PP_FLAG_AVS 2 + +enum +{ + PP_NULL = 0, + PP_NV12_LOAD_SAVE, + PP_NV12_SCALING, + PP_NV12_AVS, + PP_NV12_DNDI, +}; + +struct pp_load_save_context +{ + int dest_w; + int dest_h; +}; + +struct pp_scaling_context +{ + int dest_w; + int dest_h; +}; + +struct pp_avs_context +{ + int dest_w; + int dest_h; + int src_w; + int src_h; +}; + +struct pp_dndi_context +{ + int dest_w; + int dest_h; + +}; + +struct i965_post_processing_context +{ + int current_pp; + + struct { + dri_bo *bo; + } curbe; + + struct { + dri_bo *ss_bo; + dri_bo *s_bo; + } surfaces[MAX_PP_SURFACES]; + + struct { + dri_bo *bo; + } binding_table; + + struct { + dri_bo *bo; + } idrt; + + struct { + dri_bo *bo; + } vfe_state; + + struct { + dri_bo *bo; + dri_bo *bo_8x8; + dri_bo *bo_8x8_uv; + } sampler_state_table; + + struct { + unsigned int size; + + unsigned int vfe_start; + unsigned int cs_start; + + unsigned int num_vfe_entries; + unsigned int num_cs_entries; + + unsigned int size_vfe_entry; + unsigned int size_cs_entry; + } urb; + + struct { + dri_bo *bo; + } stmm; + + union { + struct pp_load_save_context pp_load_save_context; + struct pp_scaling_context pp_scaling_context; + struct pp_avs_context pp_avs_context; + struct pp_dndi_context pp_dndi_context; + } private_context; + + int (*pp_x_steps)(void *private_context); + int (*pp_y_steps)(void *private_context); + int (*pp_set_block_parameter)(void *private_context, int x, int y); +}; + +void +i965_post_processing(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth, + unsigned int pp_index); +void +i965_post_processing_once_init(VADriverContextP ctx); +Bool +i965_post_processing_terminate(VADriverContextP ctx); + +#endif /* __I965_POST_PROCESSING_H__ */ diff --git a/i965_drv_video/i965_render.c b/i965_drv_video/i965_render.c index c4e8ed8..ceef319 100644 --- a/i965_drv_video/i965_render.c +++ b/i965_drv_video/i965_render.c @@ -655,12 +655,20 @@ i965_render_src_surfaces_state(VADriverContextP ctx, obj_surface = SURFACE(surface); assert(obj_surface); - assert(obj_surface->bo); - w = obj_surface->width; - h = obj_surface->height; - rw = obj_surface->orig_width; - rh = obj_surface->orig_height; - region = obj_surface->bo; + + if (obj_surface->pp_out_bo) { + w = obj_surface->pp_out_width; + h = obj_surface->pp_out_height; + rw = obj_surface->orig_pp_out_width; + rh = obj_surface->orig_pp_out_height; + region = obj_surface->pp_out_bo; + } else { + w = obj_surface->width; + h = obj_surface->height; + rw = obj_surface->orig_width; + rh = obj_surface->orig_height; + region = obj_surface->bo; + } i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM); /* Y */ i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM); @@ -1454,8 +1462,14 @@ i965_render_put_surface(VADriverContextP ctx, short destx, short desty, unsigned short destw, - unsigned short desth) + unsigned short desth, + unsigned int flag) { + i965_post_processing(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + flag); + i965_render_initialize(ctx); i965_surface_render_state_setup(ctx, surface, srcx, srcy, srcw, srch, @@ -1523,6 +1537,8 @@ i965_render_init(VADriverContextP ctx) assert(render_state->curbe.bo); render_state->curbe.upload = 0; + i965_post_processing_once_init(ctx); + return True; } @@ -1533,6 +1549,8 @@ i965_render_terminate(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; + i965_post_processing_terminate(ctx); + dri_bo_unreference(render_state->curbe.bo); render_state->curbe.bo = NULL; diff --git a/i965_drv_video/i965_render.h b/i965_drv_video/i965_render.h index 9abb81f..84b50f2 100644 --- a/i965_drv_video/i965_render.h +++ b/i965_drv_video/i965_render.h @@ -31,6 +31,8 @@ #define MAX_RENDER_SURFACES 16 #define MAX_SAMPLERS 16 +#include "i965_post_processing.h" + struct i965_render_state { struct { @@ -65,6 +67,9 @@ struct i965_render_state int interleaved_uv; struct intel_region *draw_region; + + int pp_flag; /* 0: disable, 1: enable */ + struct i965_post_processing_context pp_context; }; Bool i965_render_init(VADriverContextP ctx); @@ -78,7 +83,8 @@ void i965_render_put_surface(VADriverContextP ctx, short destx, short desty, unsigned short destw, - unsigned short desth); + unsigned short desth, + unsigned int flag); void diff --git a/i965_drv_video/i965_structs.h b/i965_drv_video/i965_structs.h index d133446..f8be616 100644 --- a/i965_drv_video/i965_structs.h +++ b/i965_drv_video/i965_structs.h @@ -639,4 +639,329 @@ struct i965_cc_unit_state } cc7; }; +struct i965_sampler_8x8 +{ + struct { + unsigned int pad0:16; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int pad1:8; + unsigned int ief_filter_size:1; + unsigned int ief_filter_type:1; + unsigned int ief_bypass:1; + unsigned int pad2:1; + unsigned int avs_filter_type:1; + } dw0; + + struct { + unsigned int pad0:5; + unsigned int sampler_8x8_state_pointer:27; + } dw1; + + struct { + unsigned int weak_edge_threshold:4; + unsigned int strong_edge_threshold:4; + unsigned int global_noise_estimation:8; + unsigned int pad0:16; + } dw2; + + struct { + unsigned int r3x_coefficient:5; + unsigned int pad0:1; + unsigned int r3c_coefficient:5; + unsigned int pad1:3; + unsigned int gain_factor:6; + unsigned int non_edge_weight:3; + unsigned int pad2:1; + unsigned int regular_weight:3; + unsigned int pad3:1; + unsigned int strong_edge_weight:3; + unsigned int pad4:1; + } dw3; + + struct { + unsigned int pad0:2; + unsigned int mr_boost:1; + unsigned int mr_threshold:4; + unsigned int steepness_boost:1; + unsigned int steepness_threshold:4; + unsigned int pad1:2; + unsigned int r5x_coefficient:5; + unsigned int pad2:1; + unsigned int r5cx_coefficient:5; + unsigned int pad3:1; + unsigned int r5c_coefficient:5; + unsigned int pad4:1; + } dw4; + + struct { + unsigned int pwl1_point_1:8; + unsigned int pwl1_point_2:8; + unsigned int pwl1_point_3:8; + unsigned int pwl1_point_4:8; + } dw5; + + struct { + unsigned int pwl1_point_5:8; + unsigned int pwl1_point_6:8; + unsigned int pwl1_r3_bias_0:8; + unsigned int pwl1_r3_bias_1:8; + } dw6; + + struct { + unsigned int pwl1_r3_bias_2:8; + unsigned int pwl1_r3_bias_3:8; + unsigned int pwl1_r3_bias_4:8; + unsigned int pwl1_r3_bias_5:8; + } dw7; + + struct { + unsigned int pwl1_r3_bias_6:8; + unsigned int pwl1_r5_bias_0:8; + unsigned int pwl1_r5_bias_1:8; + unsigned int pwl1_r5_bias_2:8; + } dw8; + + struct { + unsigned int pwl1_r5_bias_3:8; + unsigned int pwl1_r5_bias_4:8; + unsigned int pwl1_r5_bias_5:8; + unsigned int pwl1_r5_bias_6:8; + } dw9; + + struct { + int pwl1_r3_slope_0:8; + int pwl1_r3_slope_1:8; + int pwl1_r3_slope_2:8; + int pwl1_r3_slope_3:8; + } dw10; + + struct { + int pwl1_r3_slope_4:8; + int pwl1_r3_slope_5:8; + int pwl1_r3_slope_6:8; + int pwl1_r5_slope_0:8; + } dw11; + + struct { + int pwl1_r5_slope_1:8; + int pwl1_r5_slope_2:8; + int pwl1_r5_slope_3:8; + int pwl1_r5_slope_4:8; + } dw12; + + struct { + int pwl1_r5_slope_5:8; + int pwl1_r5_slope_6:8; + unsigned int limiter_boost:4; + unsigned int pad0:4; + unsigned int minimum_limiter:4; + unsigned int maximum_limiter:4; + } dw13; + + struct { + unsigned int pad0:8; + unsigned int clip_limiter:10; + unsigned int pad1:14; + } dw14; + + unsigned int dw15; /* Just a pad */ +}; + +struct i965_sampler_8x8_coefficient +{ + struct { + int table_0x_filter_c0:8; + int table_0x_filter_c1:8; + int table_0x_filter_c2:8; + int table_0x_filter_c3:8; + } dw0; + + struct { + int table_0x_filter_c4:8; + int table_0x_filter_c5:8; + int table_0x_filter_c6:8; + int table_0x_filter_c7:8; + } dw1; + + struct { + int table_0y_filter_c0:8; + int table_0y_filter_c1:8; + int table_0y_filter_c2:8; + int table_0y_filter_c3:8; + } dw2; + + struct { + int table_0y_filter_c4:8; + int table_0y_filter_c5:8; + int table_0y_filter_c6:8; + int table_0y_filter_c7:8; + } dw3; + + struct { + int pad0:16; + int table_1x_filter_c2:8; + int table_1x_filter_c3:8; + } dw4; + + struct { + int table_1x_filter_c4:8; + int table_1x_filter_c5:8; + int pad0:16; + } dw5; + + struct { + int pad0:16; + int table_1y_filter_c2:8; + int table_1y_filter_c3:8; + } dw6; + + struct { + int table_1y_filter_c4:8; + int table_1y_filter_c5:8; + int pad0:16; + } dw7; +}; + +struct i965_sampler_8x8_state +{ + struct i965_sampler_8x8_coefficient coefficients[17]; + + struct { + unsigned int transition_area_with_8_pixels:3; + unsigned int pad0:1; + unsigned int transition_area_with_4_pixels:3; + unsigned int pad1:1; + unsigned int max_derivative_8_pixels:8; + unsigned int max_derivative_4_pixels:8; + unsigned int default_sharpness_level:8; + } dw136; + + struct { + unsigned int bit_field_name:1; + unsigned int adaptive_filter_for_all_channel:1; + unsigned int pad0:19; + unsigned int bypass_y_adaptive_filtering:1; + unsigned int bypass_x_adaptive_filtering:1; + unsigned int pad1:9; + } dw137; +}; + +struct i965_surface_state2 +{ + struct { + unsigned int surface_base_address; + } ss0; + + struct { + unsigned int cbcr_pixel_offset_v_direction:2; + unsigned int pad0:4; + unsigned int width:13; + unsigned int height:13; + } ss1; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int half_pitch_for_chroma:1; + unsigned int pitch:17; + unsigned int pad0:2; + unsigned int surface_object_control_data:4; + unsigned int pad1:1; + unsigned int interleave_chroma:1; + unsigned int surface_format:4; + } ss2; + + struct { + unsigned int y_offset_for_cb:13; + unsigned int pad0:3; + unsigned int x_offset_for_cb:13; + unsigned int pad1:3; + } ss3; + + struct { + unsigned int y_offset_for_cr:13; + unsigned int pad0:3; + unsigned int x_offset_for_cr:13; + unsigned int pad1:3; + } ss4; +}; + +struct i965_sampler_dndi +{ + struct { + unsigned int denoise_asd_threshold:8; + unsigned int denoise_history_delta:8; + unsigned int denoise_maximum_history:8; + unsigned int denoise_stad_threshold:8; + } dw0; + + struct { + unsigned int denoise_threshold_for_sum_of_complexity_measure:8; + unsigned int denoise_moving_pixel_threshold:5; + unsigned int stmm_c2:3; + unsigned int low_temporal_difference_threshold:6; + unsigned int pad0:2; + unsigned int temporal_difference_threshold:6; + unsigned int pad1:2; + } dw1; + + struct { + unsigned int block_noise_estimate_noise_threshold:8; + unsigned int block_noise_estimate_edge_threshold:8; + unsigned int denoise_edge_threshold:8; + unsigned int good_neighbor_threshold:8; + } dw2; + + struct { + unsigned int maximum_stmm:8; + unsigned int multipler_for_vecm:6; + unsigned int pad0:2; + unsigned int blending_constant_across_time_for_small_values_of_stmm:8; + unsigned int blending_constant_across_time_for_large_values_of_stmm:7; + unsigned int stmm_blending_constant_select:1; + } dw3; + + struct { + unsigned int sdi_delta:8; + unsigned int sdi_threshold:8; + unsigned int stmm_output_shift:4; + unsigned int stmm_shift_up:2; + unsigned int stmm_shift_down:2; + unsigned int minimum_stmm:8; + } dw4; + + struct { + unsigned int fmd_temporal_difference_threshold:8; + unsigned int sdi_fallback_mode_2_constant:8; + unsigned int sdi_fallback_mode_1_t2_constant:8; + unsigned int sdi_fallback_mode_1_t1_constant:8; + } dw5; + + struct { + unsigned int dn_enable:1; + unsigned int di_enable:1; + unsigned int di_partial:1; + unsigned int dndi_top_first:1; + unsigned int dndi_stream_id:1; + unsigned int dndi_first_frame:1; + unsigned int progressive_dn:1; + unsigned int pad0:1; + unsigned int fmd_tear_threshold:6; + unsigned int pad1:2; + unsigned int fmd2_vertical_difference_threshold:8; + unsigned int fmd1_vertical_difference_threshold:8; + } dw6; + + struct { + unsigned int pad0:8; + unsigned int fmd_for_1st_field_of_current_frame:2; + unsigned int pad1:6; + unsigned int fmd_for_2nd_field_of_previous_frame:2; + unsigned int vdi_walker_enable:1; + unsigned int pad2:4; + unsigned int column_width_minus1:9; + } dw7; +}; + #endif /* _I965_STRUCTS_H_ */ diff --git a/i965_drv_video/intel_batchbuffer.c b/i965_drv_video/intel_batchbuffer.c index abe548e..15c3201 100644 --- a/i965_drv_video/intel_batchbuffer.c +++ b/i965_drv_video/intel_batchbuffer.c @@ -37,7 +37,7 @@ static void intel_batchbuffer_reset(struct intel_batchbuffer *batch) { struct intel_driver_data *intel = batch->intel; - int batch_size = batch->flag == I915_EXEC_RENDER ? BATCH_SIZE : (BATCH_SIZE * 8); + int batch_size = BATCH_SIZE; assert(batch->flag == I915_EXEC_RENDER || batch->flag == I915_EXEC_BSD); diff --git a/i965_drv_video/intel_driver.h b/i965_drv_video/intel_driver.h index ffa8cad..1e2adfa 100644 --- a/i965_drv_video/intel_driver.h +++ b/i965_drv_video/intel_driver.h @@ -17,7 +17,7 @@ #define INLINE #endif -#define BATCH_SIZE 0x10000 +#define BATCH_SIZE 0x80000 #define BATCH_RESERVED 0x10 #define CMD_MI (0x0 << 29) |