summaryrefslogtreecommitdiff
path: root/src/intel
diff options
context:
space:
mode:
authorChuanbo Weng <chuanbo.weng@intel.com>2017-06-14 00:54:13 +0800
committerYang Rong <rong.r.yang@intel.com>2017-07-12 18:29:19 +0800
commit9cb7ff4c285d892616595e5a43793f4d1408eca4 (patch)
tree335679b4a0e2fb166ae5bb0517a871cde6071529 /src/intel
parent4933bf9212c9721ca2b0e615097ed2b53fec51c3 (diff)
Implement extension cl_intel_device_side_avc_motion_estimation.
This patch mainly contains: 1. built-in function __gen_ocl_ime implementation. 2. Lots of built-in functions of cl_intel_device_side_avc_motion_estimation are implemented. 3. This extension is required to run in simd16 mode. v2: move the utests to seprate patches one by one; as all the utests has extension function check, no need to put them in stand alone utest; uncomment the self test; fix extension check logic issue, should be && instead of ||. Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com> Signed-off-by: Xionghu Luo <xionghu.luo@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/intel_gpgpu.c70
-rw-r--r--src/intel/intel_structs.h63
2 files changed, 133 insertions, 0 deletions
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 2b778e5a..b0d6bd94 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1337,6 +1337,75 @@ intel_gpgpu_bind_image_for_vme_gen7(intel_gpgpu_t *gpgpu,
assert(index < GEN_MAX_SURFACES);
}
+static void
+intel_gpgpu_bind_image_for_vme_gen9(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen9_media_surface_state_t *ss = (gen9_media_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+
+ memset(ss, 0, sizeof(gen8_surface_state_t));
+ ss->ss0.rotation = 0; //++
+ ss->ss1.uv_offset_v_direction = 0;
+ ss->ss1.pic_struct = 0;
+ ss->ss1.width = w - 1;
+ ss->ss1.height = h - 1;
+ if (tiling == GPGPU_NO_TILE) {
+ ss->ss2.tile_mode = 0;
+ }
+ else if (tiling == GPGPU_TILE_X){
+ ss->ss2.tile_mode = 2;
+ }
+ else if (tiling == GPGPU_TILE_Y){
+ ss->ss2.tile_mode = 3;
+ }
+ ss->ss2.half_pitch_for_chroma = 0;
+ ss->ss2.surface_pitch = pitch - 1;
+ ss->ss2.address_control = 1; //++ CLAMP: 0; MIRROR:1;
+ ss->ss2.mem_compress_enable = 0; //++
+ ss->ss2.mem_compress_mode = 0; //++
+ ss->ss2.uv_offset_v_direction_msb = 0; //++
+ ss->ss2.uv_offset_u_direction = 0; //++
+ ss->ss2.interleave_chroma = 0;
+ ss->ss2.surface_format = 12; //Y8_UNORM
+ //ss->ss2.surface_format = 4; //PLANAR_420_8
+ ss->ss3.y_offset_for_u = 0;
+ ss->ss3.x_offset_for_u = 0;
+ ss->ss4.y_offset_for_v = 0;
+ ss->ss4.x_offset_for_v = 0;
+ ss->ss5.surface_object_control_state = cl_gpgpu_get_cache_ctrl();
+ ss->ss5.tiled_res_mode = 0; //++ TRMODE_NONE: 0; TRMODE_TILEYF: 1; TRMODE_TILEYS:2
+ ss->ss5.vert_line_stride_offset = 0; //++
+ ss->ss5.vert_line_stride = 0; //++
+ ss->ss6.base_addr = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; //
+ ss->ss7.base_addr_high = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; //
+
+
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * surface_state_sz;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ obj_bo_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen9_media_surface_state_t, ss6),
+ obj_bo);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
static void
intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
@@ -2562,6 +2631,7 @@ intel_set_gpgpu_callbacks(int device_id)
}
if (IS_GEN9(device_id)) {
cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
+ cl_gpgpu_bind_image_for_vme = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_for_vme_gen9;
intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;
intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index b38cc423..282929d7 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -425,6 +425,69 @@ typedef struct gen7_media_surface_state
} ss7;
} gen7_media_surface_state_t;
+typedef struct gen9_media_surface_state
+{
+ struct {
+ uint32_t pad3:12;
+ uint32_t pad2:4;
+ uint32_t pad1:11; //ExistsIf [Surface Format] is not one of Planar Formats
+ uint32_t rotation:2;
+ } ss0;
+
+ struct {
+ uint32_t uv_offset_v_direction:2;
+ uint32_t pic_struct:2;
+ uint32_t width:14;
+ uint32_t height:14;
+ } ss1;
+
+ struct {
+ uint32_t tile_mode:2;
+ uint32_t half_pitch_for_chroma:1;
+ uint32_t surface_pitch:18;
+ uint32_t address_control:1;
+ uint32_t mem_compress_enable:1;
+ uint32_t mem_compress_mode:1;
+ uint32_t uv_offset_v_direction_msb:1;
+ uint32_t uv_offset_u_direction:1;
+ uint32_t interleave_chroma:1;
+ uint32_t surface_format:5;
+ } ss2;
+
+ struct {
+ uint32_t y_offset_for_u:14;
+ uint32_t pad1:2;
+ uint32_t x_offset_for_u:14;
+ uint32_t pad0:2;
+ } ss3;
+
+ struct {
+ uint32_t y_offset_for_v:15;
+ uint32_t pad1:1;
+ uint32_t x_offset_for_v:14;
+ uint32_t pad0:2;
+ } ss4;
+
+ struct {
+ uint32_t surface_object_control_state:7;
+ uint32_t pad2:11;
+ uint32_t tiled_res_mode:2;
+ uint32_t pad1:4;
+ uint32_t pad0:6;
+ uint32_t vert_line_stride_offset:1;
+ uint32_t vert_line_stride:1;
+ } ss5;
+
+ struct {
+ uint32_t base_addr;
+ } ss6;
+
+ struct {
+ uint32_t base_addr_high:16;
+ uint32_t pad0:16;
+ } ss7;
+} gen9_media_surface_state_t;
+
typedef union gen_surface_state
{
gen7_surface_state_t gen7_surface_state;