From 05076f28ae37a90aeb3f72145659437500092d2e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 25 May 2011 12:55:46 +0800 Subject: i965_drv_video: rendering for Ivybridge Signed-off-by: Xiang, Haihao --- i965_defines.h | 137 ++++++- i965_render.c | 1158 +++++++++++++++++++++++++++++++++++++++++++++++++------- i965_structs.h | 124 ++++++ 3 files changed, 1286 insertions(+), 133 deletions(-) diff --git a/i965_defines.h b/i965_defines.h index c689011..66b0b5d 100644 --- a/i965_defines.h +++ b/i965_defines.h @@ -87,10 +87,14 @@ #define GEN6_3DSTATE_CLIP CMD(3, 0, 0x12) #define GEN6_3DSTATE_SF CMD(3, 0, 0x13) -/* DW1 */ -# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 -# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 -# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW1 on GEN6 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW1 on GEN7 */ +# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12 + + /* DW2 */ /* DW3 */ # define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) @@ -123,6 +127,47 @@ # define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) # define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +/* 3DSTATE_WM on GEN7 */ +/* DW1 */ +# define GEN7_WM_STATISTICS_ENABLE (1 << 31) +# define GEN7_WM_DEPTH_CLEAR (1 << 30) +# define GEN7_WM_DISPATCH_ENABLE (1 << 29) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN7_WM_KILL_ENABLE (1 << 25) +# define GEN7_WM_PSCDEPTH_OFF (0 << 23) +# define GEN7_WM_PSCDEPTH_ON (1 << 23) +# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23) +# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23) +# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN7_WM_USES_SOURCE_W (1 << 19) +# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) +# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) +# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17) +# define GEN7_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16) +# define GEN7_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15) +# define GEN7_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14) +# define GEN7_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13) +# define GEN7_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12) +# define GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11) +# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) +# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6) +# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6) +# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6) +# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6) +# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4) +# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3) +# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) +# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0) +# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0) +# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0) +# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0) +/* DW2 */ +# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) #define GEN6_3DSTATE_CONSTANT_VS CMD(3, 0, 0x15) #define GEN6_3DSTATE_CONSTANT_GS CMD(3, 0, 0x16) @@ -143,6 +188,86 @@ # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) +/* GEN7 */ +#define GEN7_3DSTATE_CLEAR_PARAMS CMD(3, 0, 0x04) +#define GEN7_3DSTATE_DEPTH_BUFFER CMD(3, 0, 0x05) + +#define GEN7_3DSTATE_URB_VS CMD(3, 0, 0x30) +#define GEN7_3DSTATE_URB_HS CMD(3, 0, 0x31) +#define GEN7_3DSTATE_URB_DS CMD(3, 0, 0x32) +#define GEN7_3DSTATE_URB_GS CMD(3, 0, 0x33) +/* DW1 */ +# define GEN7_URB_ENTRY_NUMBER_SHIFT 0 +# define GEN7_URB_ENTRY_SIZE_SHIFT 16 +# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS CMD(3, 1, 0x12) +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS CMD(3, 1, 0x16) +/* DW1 */ +# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 + +#define GEN7_3DSTATE_CONSTANT_HS CMD(3, 0, 0x19) +#define GEN7_3DSTATE_CONSTANT_DS CMD(3, 0, 0x1a) + +#define GEN7_3DSTATE_HS CMD(3, 0, 0x1b) +#define GEN7_3DSTATE_TE CMD(3, 0, 0x1c) +#define GEN7_3DSTATE_DS CMD(3, 0, 0x1d) +#define GEN7_3DSTATE_STREAMOUT CMD(3, 0, 0x1e) +#define GEN7_3DSTATE_SBE CMD(3, 0, 0x1f) + +/* DW1 */ +# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28) +# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22 +# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21) +# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 + +#define GEN7_3DSTATE_PS CMD(3, 0, 0x20) +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN7_PS_SPF_MODE (1 << 31) +# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN7_PS_SAMPLER_COUNT_SHIFT 27 +# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN7_PS_MAX_THREADS_SHIFT 23 +# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) +# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) +# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN7_PS_POSOFFSET_NONE (0 << 3) +# define GEN7_PS_POSOFFSET_CENTROID (2 << 3) +# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) +# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2) +# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1) +# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0) +/* DW5 */ +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0 +/* DW6: kernel 1 pointer */ +/* DW7: kernel 2 pointer */ + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL CMD(3, 0, 0x21) +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC CMD(3, 0, 0x23) + +#define GEN7_3DSTATE_BLEND_STATE_POINTERS CMD(3, 0, 0x24) +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS CMD(3, 0, 0x25) + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS CMD(3, 0, 0x26) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS CMD(3, 0, 0x27) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS CMD(3, 0, 0x28) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS CMD(3, 0, 0x29) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS CMD(3, 0, 0x2a) + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS CMD(3, 0, 0x2b) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS CMD(3, 0, 0x2e) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS CMD(3, 0, 0x2f) + #define MFX(pipeline, op, sub_opa, sub_opb) \ (3 << 29 | \ (pipeline) << 27 | \ @@ -457,11 +582,15 @@ #define VB0_INSTANCEDATA (1 << 26) #define GEN6_VB0_VERTEXDATA (0 << 20) #define GEN6_VB0_INSTANCEDATA (1 << 20) +#define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) #define VB0_BUFFER_PITCH_SHIFT 0 #define _3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) #define _3DPRIMITIVE_VERTEX_RANDOM (1 << 15) #define _3DPRIMITIVE_TOPOLOGY_SHIFT 10 +/* DW1 on GEN7*/ +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) #define _3DPRIM_POINTLIST 0x01 #define _3DPRIM_LINELIST 0x02 diff --git a/i965_render.c b/i965_render.c index 5d2c1bf..d6ae7f3 100644 --- a/i965_render.c +++ b/i965_render.c @@ -116,7 +116,27 @@ static const uint32_t ps_subpic_kernel_static_gen6[][4] = { #include "shaders/render/exa_wm_write.g6b" }; -#define SURFACE_STATE_PADDED_SIZE ALIGN(sizeof(struct i965_surface_state), 32) +/* programs for Ivybridge */ +static const uint32_t sf_kernel_static_gen7[][4] = +{ +}; + +static const uint32_t ps_kernel_static_gen7[][4] = { +#include "shaders/render/exa_wm_src_affine.g7b" +#include "shaders/render/exa_wm_src_sample_planar.g7b" +#include "shaders/render/exa_wm_yuv_rgb.g7b" +#include "shaders/render/exa_wm_write.g7b" +}; + +static const uint32_t ps_subpic_kernel_static_gen7[][4] = { +#include "shaders/render/exa_wm_src_affine.g7b" +#include "shaders/render/exa_wm_src_sample_argb.g7b" +#include "shaders/render/exa_wm_write.g7b" +}; + +#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) @@ -213,6 +233,31 @@ static struct i965_kernel render_kernels_gen6[] = { } }; +static struct i965_kernel render_kernels_gen7[] = { + { + "SF", + SF_KERNEL, + sf_kernel_static_gen7, + sizeof(sf_kernel_static_gen7), + NULL + }, + { + "PS", + PS_KERNEL, + ps_kernel_static_gen7, + sizeof(ps_kernel_static_gen7), + NULL + }, + + { + "PS_SUBPIC", + PS_SUBPIC_KERNEL, + ps_subpic_kernel_static_gen7, + sizeof(ps_subpic_kernel_static_gen7), + NULL + } +}; + #define URB_VS_ENTRIES 8 #define URB_VS_ENTRY_SIZE 1 @@ -593,55 +638,114 @@ i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tilin } static void -i965_render_src_surface_state(VADriverContextP ctx, - int index, - dri_bo *region, - unsigned long offset, - int w, int h, +i965_render_set_surface_state(struct i965_surface_state *ss, + dri_bo *bo, unsigned long offset, + int width, int height, int pitch, int format) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct i965_surface_state *ss; - dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; unsigned int tiling; unsigned int swizzle; - assert(index < MAX_RENDER_SURFACES); - - dri_bo_map(ss_bo, 1); - assert(ss_bo->virtual); - ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); memset(ss, 0, sizeof(*ss)); ss->ss0.surface_type = I965_SURFACE_2D; ss->ss0.surface_format = format; - ss->ss0.writedisable_alpha = 0; - ss->ss0.writedisable_red = 0; - ss->ss0.writedisable_green = 0; - ss->ss0.writedisable_blue = 0; ss->ss0.color_blend = 1; - ss->ss0.vert_line_stride = 0; - ss->ss0.vert_line_stride_ofs = 0; - ss->ss0.mipmap_layout_mode = 0; - ss->ss0.render_cache_read_mode = 0; - ss->ss1.base_addr = region->offset + offset; + ss->ss1.base_addr = bo->offset + offset; - ss->ss2.width = w - 1; - ss->ss2.height = h - 1; - ss->ss2.mip_count = 0; - ss->ss2.render_target_rotation = 0; + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; ss->ss3.pitch = pitch - 1; - dri_bo_get_tiling(region, &tiling, &swizzle); + dri_bo_get_tiling(bo, &tiling, &swizzle); i965_render_set_surface_tiling(ss, tiling); +} + +static void +gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void +gen7_render_set_surface_state(struct gen7_surface_state *ss, + dri_bo *bo, unsigned long offset, + int width, int height, + int pitch, int format) +{ + unsigned int tiling; + unsigned int swizzle; + + memset(ss, 0, sizeof(*ss)); + + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + + ss->ss1.base_addr = bo->offset + offset; + + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; + + ss->ss3.pitch = pitch - 1; + + dri_bo_get_tiling(bo, &tiling, &swizzle); + gen7_render_set_surface_tiling(ss, tiling); +} + +static void +i965_render_src_surface_state(VADriverContextP ctx, + int index, + dri_bo *region, + unsigned long offset, + int w, int h, + int pitch, int format) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + void *ss; + dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; + + assert(index < MAX_RENDER_SURFACES); - dri_bo_emit_reloc(ss_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - offset, - SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1), - region); + dri_bo_map(ss_bo, 1); + assert(ss_bo->virtual); + ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); + + if (IS_GEN7(i965->intel.device_id)) { + gen7_render_set_surface_state(ss, + region, offset, + w, h, + pitch, format); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1), + region); + } else { + i965_render_set_surface_state(ss, + region, offset, + w, h, + pitch, format); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1), + region); + } ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); dri_bo_unmap(ss_bo); @@ -729,49 +833,42 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; struct intel_region *dest_region = render_state->draw_region; - struct i965_surface_state *ss; + void *ss; dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; - + int format; assert(index < MAX_RENDER_SURFACES); - dri_bo_map(ss_bo, 1); - assert(ss_bo->virtual); - ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); - memset(ss, 0, sizeof(*ss)); - - ss->ss0.surface_type = I965_SURFACE_2D; - ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32; - if (dest_region->cpp == 2) { - ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM; - } else { - ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM; + format = I965_SURFACEFORMAT_B5G6R5_UNORM; + } else { + format = I965_SURFACEFORMAT_B8G8R8A8_UNORM; } - ss->ss0.writedisable_alpha = 0; - ss->ss0.writedisable_red = 0; - ss->ss0.writedisable_green = 0; - ss->ss0.writedisable_blue = 0; - ss->ss0.color_blend = 1; - ss->ss0.vert_line_stride = 0; - ss->ss0.vert_line_stride_ofs = 0; - ss->ss0.mipmap_layout_mode = 0; - ss->ss0.render_cache_read_mode = 0; - - ss->ss1.base_addr = dest_region->bo->offset; - - ss->ss2.width = dest_region->width - 1; - ss->ss2.height = dest_region->height - 1; - ss->ss2.mip_count = 0; - ss->ss2.render_target_rotation = 0; - ss->ss3.pitch = dest_region->pitch - 1; - i965_render_set_surface_tiling(ss, dest_region->tiling); - - dri_bo_emit_reloc(ss_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0, - SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1), - dest_region->bo); + dri_bo_map(ss_bo, 1); + assert(ss_bo->virtual); + ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); + + if (IS_GEN7(i965->intel.device_id)) { + gen7_render_set_surface_state(ss, + dest_region->bo, 0, + dest_region->width, dest_region->height, + dest_region->pitch, format); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1), + dest_region->bo); + } else { + i965_render_set_surface_state(ss, + dest_region->bo, 0, + dest_region->width, dest_region->height, + dest_region->pitch, format); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1), + dest_region->bo); + } ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); dri_bo_unmap(ss_bo); @@ -2094,81 +2191,884 @@ gen6_render_put_subpicture(VADriverContextP ctx, } /* - * global functions + * for GEN7 */ -void -intel_render_put_surface(VADriverContextP ctx, - VASurfaceID surface, - short srcx, - short srcy, - unsigned short srcw, - unsigned short srch, - short destx, - short desty, - unsigned short destw, - unsigned short desth, - unsigned int flag) +static void +gen7_render_initialize(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + dri_bo *bo; - i965_post_processing(ctx, surface, - srcx, srcy, srcw, srch, - destx, desty, destw, desth, - flag); + /* VERTEX BUFFER */ + dri_bo_unreference(render_state->vb.vertex_buffer); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vertex buffer", + 4096, + 4096); + assert(bo); + render_state->vb.vertex_buffer = bo; - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) - gen6_render_put_surface(ctx, surface, - srcx, srcy, srcw, srch, - destx, desty, destw, desth, - flag); - else - i965_render_put_surface(ctx, surface, - srcx, srcy, srcw, srch, - destx, desty, destw, desth, - flag); + /* WM */ + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES, + 4096); + assert(bo); + render_state->wm.surface_state_binding_table_bo = bo; + + dri_bo_unreference(render_state->wm.sampler); + bo = dri_bo_alloc(i965->intel.bufmgr, + "sampler state", + MAX_SAMPLERS * sizeof(struct gen7_sampler_state), + 4096); + assert(bo); + render_state->wm.sampler = bo; + render_state->wm.sampler_count = 0; + + /* COLOR CALCULATOR */ + dri_bo_unreference(render_state->cc.state); + bo = dri_bo_alloc(i965->intel.bufmgr, + "color calc state", + sizeof(struct gen6_color_calc_state), + 4096); + assert(bo); + render_state->cc.state = bo; + + /* CC VIEWPORT */ + dri_bo_unreference(render_state->cc.viewport); + bo = dri_bo_alloc(i965->intel.bufmgr, + "cc viewport", + sizeof(struct i965_cc_viewport), + 4096); + assert(bo); + render_state->cc.viewport = bo; + + /* BLEND STATE */ + dri_bo_unreference(render_state->cc.blend); + bo = dri_bo_alloc(i965->intel.bufmgr, + "blend state", + sizeof(struct gen6_blend_state), + 4096); + assert(bo); + render_state->cc.blend = bo; + + /* DEPTH & STENCIL STATE */ + dri_bo_unreference(render_state->cc.depth_stencil); + bo = dri_bo_alloc(i965->intel.bufmgr, + "depth & stencil state", + sizeof(struct gen6_depth_stencil_state), + 4096); + assert(bo); + render_state->cc.depth_stencil = bo; } -void -intel_render_put_subpicture(VADriverContextP ctx, - VASurfaceID surface, - short srcx, - short srcy, - unsigned short srcw, - unsigned short srch, - short destx, - short desty, - unsigned short destw, - unsigned short desth) +static void +gen7_render_color_calc_state(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_color_calc_state *color_calc_state; + + dri_bo_map(render_state->cc.state, 1); + assert(render_state->cc.state->virtual); + color_calc_state = render_state->cc.state->virtual; + memset(color_calc_state, 0, sizeof(*color_calc_state)); + color_calc_state->constant_r = 1.0; + color_calc_state->constant_g = 0.0; + color_calc_state->constant_b = 1.0; + color_calc_state->constant_a = 1.0; + dri_bo_unmap(render_state->cc.state); +} - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) - gen6_render_put_subpicture(ctx, surface, - srcx, srcy, srcw, srch, - destx, desty, destw, desth); - else - i965_render_put_subpicture(ctx, surface, - srcx, srcy, srcw, srch, - destx, desty, destw, desth); +static void +gen7_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_blend_state *blend_state; + + dri_bo_map(render_state->cc.blend, 1); + assert(render_state->cc.blend->virtual); + blend_state = render_state->cc.blend->virtual; + memset(blend_state, 0, sizeof(*blend_state)); + blend_state->blend1.logic_op_enable = 1; + blend_state->blend1.logic_op_func = 0xc; + blend_state->blend1.pre_blend_clamp_enable = 1; + dri_bo_unmap(render_state->cc.blend); } -Bool -i965_render_init(VADriverContextP ctx) +static void +gen7_render_depth_stencil_state(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - int i; + struct gen6_depth_stencil_state *depth_stencil_state; + + dri_bo_map(render_state->cc.depth_stencil, 1); + assert(render_state->cc.depth_stencil->virtual); + depth_stencil_state = render_state->cc.depth_stencil->virtual; + memset(depth_stencil_state, 0, sizeof(*depth_stencil_state)); + dri_bo_unmap(render_state->cc.depth_stencil); +} - /* kernel */ - assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / - sizeof(render_kernels_gen5[0]))); - assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / - sizeof(render_kernels_gen6[0]))); +static void +gen7_render_sampler(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen7_sampler_state *sampler_state; + int i; + + assert(render_state->wm.sampler_count > 0); + assert(render_state->wm.sampler_count <= MAX_SAMPLERS); - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) + dri_bo_map(render_state->wm.sampler, 1); + assert(render_state->wm.sampler->virtual); + sampler_state = render_state->wm.sampler->virtual; + for (i = 0; i < render_state->wm.sampler_count; i++) { + memset(sampler_state, 0, sizeof(*sampler_state)); + sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state++; + } + + dri_bo_unmap(render_state->wm.sampler); +} + +static void +gen7_render_setup_states(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + i965_render_dest_surface_state(ctx, 0); + i965_render_src_surfaces_state(ctx, surface); + gen7_render_sampler(ctx); + i965_render_cc_viewport(ctx); + gen7_render_color_calc_state(ctx); + gen7_render_blend_state(ctx); + gen7_render_depth_stencil_state(ctx); + i965_render_upload_constants(ctx); + i965_render_upload_vertex(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); +} + +static void +gen7_emit_invarient_states(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2)); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(batch, 1); + ADVANCE_BATCH(batch); + + /* Set system instruction pointer */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_STATE_SIP | 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_state_base_address(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ +} + +static void +gen7_emit_viewport_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); + OUT_RELOC(batch, + render_state->cc.viewport, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +/* + * URB layout on GEN7 + * ---------------------------------------- + * | PS Push Constants (8KB) | VS entries | + * ---------------------------------------- + */ +static void +gen7_emit_urb(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); + OUT_BATCH(batch, 8); /* in 1KBs */ + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2)); + OUT_BATCH(batch, + (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */ + (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_cc_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2)); + OUT_RELOC(batch, + render_state->cc.state, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); + OUT_RELOC(batch, + render_state->cc.blend, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2)); + OUT_RELOC(batch, + render_state->cc.depth_stencil, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_sampler_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); + OUT_RELOC(batch, + render_state->wm.sampler, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_binding_table(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); + OUT_BATCH(batch, BINDING_TABLE_OFFSET); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_depth_buffer_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH(batch, + (I965_DEPTHFORMAT_D32_FLOAT << 18) | + (I965_SURFACE_NULL << 29)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_drawing_rectangle(VADriverContextP ctx) +{ + i965_render_drawing_rectangle(ctx); +} + +static void +gen7_emit_vs_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* disable VS constant buffer */ + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(batch, 0); /* without VS kernel */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ +} + +static void +gen7_emit_bypass_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* bypass GS */ + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(batch, 0); /* without GS kernel */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* disable HS */ + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable TE */ + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable DS */ + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable STREAMOUT */ + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_clip_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); +} + +static void +gen7_emit_sf_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 14); + OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2)); + OUT_BATCH(batch, + (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) | + (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | + (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* DW4 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* DW9 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_wm_state(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2)); + OUT_BATCH(batch, + GEN7_WM_DISPATCH_ENABLE | + GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); + OUT_BATCH(batch, 1); + OUT_BATCH(batch, 0); + OUT_RELOC(batch, + render_state->curbe.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2)); + OUT_RELOC(batch, + render_state->render_kernels[kernel].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH(batch, + (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | + (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(batch, 0); /* scratch space base offset */ + OUT_BATCH(batch, + ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | + GEN7_PS_PUSH_CONSTANT_ENABLE | + GEN7_PS_ATTRIBUTE_ENABLE | + GEN7_PS_16_DISPATCH_ENABLE); + OUT_BATCH(batch, + (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); + OUT_BATCH(batch, 0); /* kernel 1 pointer */ + OUT_BATCH(batch, 0); /* kernel 2 pointer */ + ADVANCE_BATCH(batch); +} + +static void +gen7_emit_vertex_element_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* Set up our vertex elements, sourced from the single vertex buffer. */ + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2)); + /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); +} + +static void +gen7_emit_vertices(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2)); + OUT_BATCH(batch, + (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | + GEN6_VB0_VERTEXDATA | + GEN7_VB0_ADDRESS_MODIFYENABLE | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(batch, + _3DPRIM_RECTLIST | + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(batch, 3); /* vertex count per instance */ + OUT_BATCH(batch, 0); /* start vertex offset */ + OUT_BATCH(batch, 1); /* single instance */ + OUT_BATCH(batch, 0); /* start instance location */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen7_render_emit_states(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen7_emit_invarient_states(ctx); + gen7_emit_state_base_address(ctx); + gen7_emit_viewport_state_pointers(ctx); + gen7_emit_urb(ctx); + gen7_emit_cc_state_pointers(ctx); + gen7_emit_sampler_state_pointers(ctx); + gen7_emit_bypass_state(ctx); + gen7_emit_vs_state(ctx); + gen7_emit_clip_state(ctx); + gen7_emit_sf_state(ctx); + gen7_emit_wm_state(ctx, kernel); + gen7_emit_binding_table(ctx); + gen7_emit_depth_buffer_state(ctx); + gen7_emit_drawing_rectangle(ctx); + gen7_emit_vertex_element_state(ctx); + gen7_emit_vertices(ctx); + intel_batchbuffer_end_atomic(batch); +} + +static void +gen7_render_put_surface(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth, + unsigned int flag) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + gen7_render_initialize(ctx); + gen7_render_setup_states(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + i965_clear_dest_region(ctx); + gen7_render_emit_states(ctx, PS_KERNEL); + intel_batchbuffer_flush(batch); +} + +static void +gen7_subpicture_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_blend_state *blend_state; + + dri_bo_unmap(render_state->cc.state); + dri_bo_map(render_state->cc.blend, 1); + assert(render_state->cc.blend->virtual); + blend_state = render_state->cc.blend->virtual; + memset(blend_state, 0, sizeof(*blend_state)); + blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.blend_enable = 1; + blend_state->blend1.post_blend_clamp_enable = 1; + blend_state->blend1.pre_blend_clamp_enable = 1; + blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ + dri_bo_unmap(render_state->cc.blend); +} + +static void +gen7_subpicture_render_setup_states(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + VARectangle output_rect; + + output_rect.x = destx; + output_rect.y = desty; + output_rect.width = destw; + output_rect.height = desth; + + i965_render_dest_surface_state(ctx, 0); + i965_subpic_render_src_surfaces_state(ctx, surface); + i965_render_sampler(ctx); + i965_render_cc_viewport(ctx); + gen7_render_color_calc_state(ctx); + gen7_subpicture_render_blend_state(ctx); + gen7_render_depth_stencil_state(ctx); + i965_subpic_render_upload_vertex(ctx, surface, &output_rect); +} + +static void +gen7_render_put_subpicture(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct object_surface *obj_surface = SURFACE(surface); + struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + + assert(obj_subpic); + gen7_render_initialize(ctx); + gen7_subpicture_render_setup_states(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL); + i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + intel_batchbuffer_flush(batch); +} + + +/* + * global functions + */ +void +intel_render_put_surface(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth, + unsigned int flag) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + i965_post_processing(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + flag); + + if (IS_GEN7(i965->intel.device_id)) + gen7_render_put_surface(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + flag); + else if (IS_GEN6(i965->intel.device_id)) + gen6_render_put_surface(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + flag); + else + i965_render_put_surface(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + flag); +} + +void +intel_render_put_subpicture(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + if (IS_GEN7(i965->intel.device_id)) + gen7_render_put_subpicture(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + else if (IS_GEN6(i965->intel.device_id)) + gen6_render_put_subpicture(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + else + i965_render_put_subpicture(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); +} + +Bool +i965_render_init(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + int i; + + /* kernel */ + assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / + sizeof(render_kernels_gen5[0]))); + assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / + sizeof(render_kernels_gen6[0]))); + + if (IS_GEN7(i965->intel.device_id)) + memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels)); + else if (IS_GEN6(i965->intel.device_id)) memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); else if (IS_IRONLAKE(i965->intel.device_id)) memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); diff --git a/i965_structs.h b/i965_structs.h index 8f81b75..df59e45 100644 --- a/i965_structs.h +++ b/i965_structs.h @@ -1128,4 +1128,128 @@ struct gen6_interface_descriptor_data } desc7; }; +struct gen7_surface_state +{ + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int pad2:2; + unsigned int render_cache_read_write:1; + unsigned int pad1:1; + unsigned int surface_array_spacing:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int horizontal_alignment:1; + unsigned int vertical_alignment:2; + unsigned int surface_format:9; /**< BRW_SURFACEFORMAT_x */ + unsigned int pad0:1; + unsigned int is_array:1; + unsigned int surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ + } ss0; + + struct { + unsigned int base_addr; + } ss1; + + struct { + unsigned int width:14; + unsigned int pad1:2; + unsigned int height:14; + unsigned int pad0:2; + } ss2; + + struct { + unsigned int pitch:18; + unsigned int pad:3; + unsigned int depth:11; + } ss3; + + struct { + unsigned int multisample_position_palette_index:3; + unsigned int num_multisamples:3; + unsigned int multisampled_surface_storage_format:1; + unsigned int render_target_view_extent:11; + unsigned int min_array_elt:11; + unsigned int rotation:2; + unsigned int pad0:1; + } ss4; + + struct { + unsigned int mip_count:4; + unsigned int min_lod:4; + unsigned int pad1:12; + unsigned int y_offset:4; + unsigned int pad0:1; + unsigned int x_offset:7; + } ss5; + + struct { + unsigned int pad; /* Multisample Control Surface stuff */ + } ss6; + + struct { + unsigned int resource_min_lod:12; + unsigned int pad0:16; + unsigned int alpha_clear_color:1; + unsigned int blue_clear_color:1; + unsigned int green_clear_color:1; + unsigned int red_clear_color:1; + } ss7; +}; + +struct gen7_sampler_state +{ + struct + { + unsigned int aniso_algorithm:1; + unsigned int lod_bias:13; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int pad1:1; + unsigned int lod_preclamp:1; + unsigned int default_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct + { + unsigned int cube_control_mode:1; + unsigned int shadow_function:3; + unsigned int pad:4; + unsigned int max_lod:12; + unsigned int min_lod:12; + } ss1; + + struct + { + unsigned int pad:5; + unsigned int default_color_pointer:27; + } ss2; + + struct + { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad:1; + unsigned int non_normalized_coord:1; + unsigned int trilinear_quality:2; + unsigned int address_round:6; + unsigned int max_aniso:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int pad0:6; + } ss3; +}; + #endif /* _I965_STRUCTS_H_ */ -- cgit v1.2.3