summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2015-01-29 14:16:12 +0800
committerYang Rong <rong.r.yang@intel.com>2015-01-29 16:14:07 +0800
commit738b5e64c99d0071e2333838c4f0530ba1f950cc (patch)
treead330343e2613856ba8b3d9aedcba3cd0bd0ebc3
parente9078de44d3cb0a00a8b9b3aaf057f3397b0f7c1 (diff)
SKL: Add function intel_gpgpu_bind_image_gen9.
SKL's qpitch is difference with BDW. And SURFTYPE_1D's qpitch means distance in pixels between array slices. So add two parameters slice_pitch and bpp to calculate it.
-rw-r--r--src/cl_command_queue.c8
-rw-r--r--src/cl_driver.h2
-rw-r--r--src/intel/intel_driver.c2
-rw-r--r--src/intel/intel_gpgpu.c84
4 files changed, 90 insertions, 6 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 3c04d6d4..be6def15 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -140,16 +140,16 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
image = cl_mem_image(k->args[id].mem);
set_image_info(k->curbe, &k->images[i], image);
cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset,
- image->intel_fmt, image->image_type,
+ image->intel_fmt, image->image_type, image->bpp,
image->w, image->h, image->depth,
- image->row_pitch, (cl_gpgpu_tiling)image->tiling);
+ image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling);
// TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer
// on demand.
if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset,
- image->intel_fmt, image->image_type,
+ image->intel_fmt, image->image_type, image->bpp,
image->w, image->h, image->depth,
- image->row_pitch, (cl_gpgpu_tiling)image->tiling);
+ image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling);
}
return CL_SUCCESS;
}
diff --git a/src/cl_driver.h b/src/cl_driver.h
index c88b9be3..16f8bbab 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -145,11 +145,13 @@ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state,
cl_buffer obj_bo,
uint32_t obj_bo_offset,
uint32_t format,
+ uint32_t bpp,
uint32_t type,
int32_t w,
int32_t h,
int32_t depth,
int pitch,
+ int32_t slice_pitch,
cl_gpgpu_tiling tiling);
extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image;
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 9e989b6f..afa44868 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -479,7 +479,7 @@ static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mo
case CL_NO_TILE:
if (dim == 1) { //vertical alignment
- if (gen_ver == 8)
+ if (gen_ver == 8 || gen_ver == 9) //SKL 1D array need 4 alignment qpitch
ret = 4;
else
ret = 2;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index a4e2b7ac..36f6eefa 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1158,10 +1158,12 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
uint32_t obj_bo_offset,
uint32_t format,
cl_mem_object_type type,
+ uint32_t bpp,
int32_t w,
int32_t h,
int32_t depth,
int32_t pitch,
+ int32_t slice_pitch,
int32_t tiling)
{
surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
@@ -1204,10 +1206,12 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
uint32_t obj_bo_offset,
uint32_t format,
cl_mem_object_type type,
+ uint32_t bpp,
int32_t w,
int32_t h,
int32_t depth,
int32_t pitch,
+ int32_t slice_pitch,
int32_t tiling)
{
surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
@@ -1252,10 +1256,12 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
uint32_t obj_bo_offset,
uint32_t format,
cl_mem_object_type type,
+ uint32_t bpp,
int32_t w,
int32_t h,
int32_t depth,
int32_t pitch,
+ int32_t slice_pitch,
int32_t tiling)
{
surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
@@ -1311,6 +1317,82 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
}
static void
+intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
+ ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
+ ss->ss0.surface_format = format;
+ if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_1D) {
+ ss->ss0.surface_array = 1;
+ ss->ss1.surface_qpitch = (slice_pitch/bpp + 3)/4; //align_h
+ }
+
+ if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_2D) {
+ ss->ss0.surface_array = 1;
+ ss->ss1.surface_qpitch = (h + 3)/4;
+ }
+
+ if(ss->ss0.surface_type == I965_SURFACE_3D)
+ ss->ss1.surface_qpitch = (h + 3)/4;
+
+ ss->ss0.horizontal_alignment = 1;
+ ss->ss0.vertical_alignment = 1;
+
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR;
+ } else
+ assert(tiling == GPGPU_NO_TILE);// W mode is not supported now.
+
+ ss->ss2.width = w - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
+
+ ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
+ ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
+
+ ss->ss4.render_target_view_ext = depth - 1;
+ ss->ss4.min_array_elt = 0;
+ ss->ss3.surface_pitch = pitch - 1;
+
+ ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+ ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
+ ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
+ ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
+ ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
+ ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * surface_state_sz;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ obj_bo_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen8_surface_state_t, ss8),
+ obj_bo);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+static void
intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset,
uint32_t internal_offset, uint32_t size, uint8_t bti)
{
@@ -2094,7 +2176,7 @@ intel_set_gpgpu_callbacks(int device_id)
return;
}
if (IS_SKYLAKE(device_id)) {
- cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen8;
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;
intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;