diff options
author | Chuanbo Weng <chuanbo.weng@intel.com> | 2017-06-14 00:54:13 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-07-12 18:29:19 +0800 |
commit | 9cb7ff4c285d892616595e5a43793f4d1408eca4 (patch) | |
tree | 335679b4a0e2fb166ae5bb0517a871cde6071529 /src/intel | |
parent | 4933bf9212c9721ca2b0e615097ed2b53fec51c3 (diff) |
Implement extension cl_intel_device_side_avc_motion_estimation.
This patch mainly contains:
1. built-in function __gen_ocl_ime implementation.
2. Lots of built-in functions of cl_intel_device_side_avc_motion_estimation
are implemented.
3. This extension is required to run in simd16 mode.
v2: move the utests to seprate patches one by one;
as all the utests has extension function check, no need to put them
in stand alone utest;
uncomment the self test;
fix extension check logic issue, should be && instead of ||.
Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com>
Signed-off-by: Xionghu Luo <xionghu.luo@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/intel_gpgpu.c | 70 | ||||
-rw-r--r-- | src/intel/intel_structs.h | 63 |
2 files changed, 133 insertions, 0 deletions
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 2b778e5a..b0d6bd94 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1337,6 +1337,75 @@ intel_gpgpu_bind_image_for_vme_gen7(intel_gpgpu_t *gpgpu, assert(index < GEN_MAX_SURFACES); } +static void +intel_gpgpu_bind_image_for_vme_gen9(intel_gpgpu_t *gpgpu, + uint32_t index, + dri_bo* obj_bo, + uint32_t obj_bo_offset, + uint32_t format, + cl_mem_object_type type, + uint32_t bpp, + int32_t w, + int32_t h, + int32_t depth, + int32_t pitch, + int32_t slice_pitch, + int32_t tiling) +{ + surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; + gen9_media_surface_state_t *ss = (gen9_media_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)]; + + memset(ss, 0, sizeof(gen8_surface_state_t)); + ss->ss0.rotation = 0; //++ + ss->ss1.uv_offset_v_direction = 0; + ss->ss1.pic_struct = 0; + ss->ss1.width = w - 1; + ss->ss1.height = h - 1; + if (tiling == GPGPU_NO_TILE) { + ss->ss2.tile_mode = 0; + } + else if (tiling == GPGPU_TILE_X){ + ss->ss2.tile_mode = 2; + } + else if (tiling == GPGPU_TILE_Y){ + ss->ss2.tile_mode = 3; + } + ss->ss2.half_pitch_for_chroma = 0; + ss->ss2.surface_pitch = pitch - 1; + ss->ss2.address_control = 1; //++ CLAMP: 0; MIRROR:1; + ss->ss2.mem_compress_enable = 0; //++ + ss->ss2.mem_compress_mode = 0; //++ + ss->ss2.uv_offset_v_direction_msb = 0; //++ + ss->ss2.uv_offset_u_direction = 0; //++ + ss->ss2.interleave_chroma = 0; + ss->ss2.surface_format = 12; //Y8_UNORM + //ss->ss2.surface_format = 4; //PLANAR_420_8 + ss->ss3.y_offset_for_u = 0; + ss->ss3.x_offset_for_u = 0; + ss->ss4.y_offset_for_v = 0; + ss->ss4.x_offset_for_v = 0; + ss->ss5.surface_object_control_state = cl_gpgpu_get_cache_ctrl(); + ss->ss5.tiled_res_mode = 0; //++ TRMODE_NONE: 0; TRMODE_TILEYF: 1; TRMODE_TILEYS:2 + ss->ss5.vert_line_stride_offset = 0; //++ + ss->ss5.vert_line_stride = 0; //++ + ss->ss6.base_addr = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; // + ss->ss7.base_addr_high = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; // + + + heap->binding_table[index] = offsetof(surface_heap_t, surface) + + index * surface_state_sz; + dri_bo_emit_reloc(gpgpu->aux_buf.bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + obj_bo_offset, + gpgpu->aux_offset.surface_heap_offset + + heap->binding_table[index] + + offsetof(gen9_media_surface_state_t, ss6), + obj_bo); + + assert(index < GEN_MAX_SURFACES); +} + static void intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, @@ -2562,6 +2631,7 @@ intel_set_gpgpu_callbacks(int device_id) } if (IS_GEN9(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9; + cl_gpgpu_bind_image_for_vme = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_for_vme_gen9; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9; intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8; diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index b38cc423..282929d7 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -425,6 +425,69 @@ typedef struct gen7_media_surface_state } ss7; } gen7_media_surface_state_t; +typedef struct gen9_media_surface_state +{ + struct { + uint32_t pad3:12; + uint32_t pad2:4; + uint32_t pad1:11; //ExistsIf [Surface Format] is not one of Planar Formats + uint32_t rotation:2; + } ss0; + + struct { + uint32_t uv_offset_v_direction:2; + uint32_t pic_struct:2; + uint32_t width:14; + uint32_t height:14; + } ss1; + + struct { + uint32_t tile_mode:2; + uint32_t half_pitch_for_chroma:1; + uint32_t surface_pitch:18; + uint32_t address_control:1; + uint32_t mem_compress_enable:1; + uint32_t mem_compress_mode:1; + uint32_t uv_offset_v_direction_msb:1; + uint32_t uv_offset_u_direction:1; + uint32_t interleave_chroma:1; + uint32_t surface_format:5; + } ss2; + + struct { + uint32_t y_offset_for_u:14; + uint32_t pad1:2; + uint32_t x_offset_for_u:14; + uint32_t pad0:2; + } ss3; + + struct { + uint32_t y_offset_for_v:15; + uint32_t pad1:1; + uint32_t x_offset_for_v:14; + uint32_t pad0:2; + } ss4; + + struct { + uint32_t surface_object_control_state:7; + uint32_t pad2:11; + uint32_t tiled_res_mode:2; + uint32_t pad1:4; + uint32_t pad0:6; + uint32_t vert_line_stride_offset:1; + uint32_t vert_line_stride:1; + } ss5; + + struct { + uint32_t base_addr; + } ss6; + + struct { + uint32_t base_addr_high:16; + uint32_t pad0:16; + } ss7; +} gen9_media_surface_state_t; + typedef union gen_surface_state { gen7_surface_state_t gen7_surface_state; |