diff options
author | Yang Rong <rong.r.yang@intel.com> | 2015-01-29 14:16:12 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-01-29 16:14:07 +0800 |
commit | 738b5e64c99d0071e2333838c4f0530ba1f950cc (patch) | |
tree | ad330343e2613856ba8b3d9aedcba3cd0bd0ebc3 | |
parent | e9078de44d3cb0a00a8b9b3aaf057f3397b0f7c1 (diff) |
SKL: Add function intel_gpgpu_bind_image_gen9.
SKL's qpitch is difference with BDW. And SURFTYPE_1D's qpitch means distance in pixels between array slices.
So add two parameters slice_pitch and bpp to calculate it.
-rw-r--r-- | src/cl_command_queue.c | 8 | ||||
-rw-r--r-- | src/cl_driver.h | 2 | ||||
-rw-r--r-- | src/intel/intel_driver.c | 2 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 84 |
4 files changed, 90 insertions, 6 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 3c04d6d4..be6def15 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -140,16 +140,16 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) image = cl_mem_image(k->args[id].mem); set_image_info(k->curbe, &k->images[i], image); cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset, - image->intel_fmt, image->image_type, + image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, - image->row_pitch, (cl_gpgpu_tiling)image->tiling); + image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); // TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer // on demand. if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset, - image->intel_fmt, image->image_type, + image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, - image->row_pitch, (cl_gpgpu_tiling)image->tiling); + image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); } return CL_SUCCESS; } diff --git a/src/cl_driver.h b/src/cl_driver.h index c88b9be3..16f8bbab 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -145,11 +145,13 @@ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state, cl_buffer obj_bo, uint32_t obj_bo_offset, uint32_t format, + uint32_t bpp, uint32_t type, int32_t w, int32_t h, int32_t depth, int pitch, + int32_t slice_pitch, cl_gpgpu_tiling tiling); extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image; diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index 9e989b6f..afa44868 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -479,7 +479,7 @@ static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mo case CL_NO_TILE: if (dim == 1) { //vertical alignment - if (gen_ver == 8) + if (gen_ver == 8 || gen_ver == 9) //SKL 1D array need 4 alignment qpitch ret = 4; else ret = 2; diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index a4e2b7ac..36f6eefa 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1158,10 +1158,12 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, + uint32_t bpp, int32_t w, int32_t h, int32_t depth, int32_t pitch, + int32_t slice_pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; @@ -1204,10 +1206,12 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, + uint32_t bpp, int32_t w, int32_t h, int32_t depth, int32_t pitch, + int32_t slice_pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; @@ -1252,10 +1256,12 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, + uint32_t bpp, int32_t w, int32_t h, int32_t depth, int32_t pitch, + int32_t slice_pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; @@ -1311,6 +1317,82 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu, } static void +intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu, + uint32_t index, + dri_bo* obj_bo, + uint32_t obj_bo_offset, + uint32_t format, + cl_mem_object_type type, + uint32_t bpp, + int32_t w, + int32_t h, + int32_t depth, + int32_t pitch, + int32_t slice_pitch, + int32_t tiling) +{ + surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; + gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)]; + memset(ss, 0, sizeof(*ss)); + ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 + ss->ss0.surface_type = get_surface_type(gpgpu, index, type); + ss->ss0.surface_format = format; + if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_1D) { + ss->ss0.surface_array = 1; + ss->ss1.surface_qpitch = (slice_pitch/bpp + 3)/4; //align_h + } + + if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_2D) { + ss->ss0.surface_array = 1; + ss->ss1.surface_qpitch = (h + 3)/4; + } + + if(ss->ss0.surface_type == I965_SURFACE_3D) + ss->ss1.surface_qpitch = (h + 3)/4; + + ss->ss0.horizontal_alignment = 1; + ss->ss0.vertical_alignment = 1; + + if (tiling == GPGPU_TILE_X) { + ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR; + } else if (tiling == GPGPU_TILE_Y) { + ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR; + } else + assert(tiling == GPGPU_NO_TILE);// W mode is not supported now. + + ss->ss2.width = w - 1; + ss->ss2.height = h - 1; + ss->ss3.depth = depth - 1; + + ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; + ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; + + ss->ss4.render_target_view_ext = depth - 1; + ss->ss4.min_array_elt = 0; + ss->ss3.surface_pitch = pitch - 1; + + ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl(); + ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED; + ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN; + ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE; + ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA; + ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ + + heap->binding_table[index] = offsetof(surface_heap_t, surface) + + index * surface_state_sz; + dri_bo_emit_reloc(gpgpu->aux_buf.bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + obj_bo_offset, + gpgpu->aux_offset.surface_heap_offset + + heap->binding_table[index] + + offsetof(gen8_surface_state_t, ss8), + obj_bo); + + assert(index < GEN_MAX_SURFACES); +} + +static void intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset, uint32_t internal_offset, uint32_t size, uint8_t bti) { @@ -2094,7 +2176,7 @@ intel_set_gpgpu_callbacks(int device_id) return; } if (IS_SKYLAKE(device_id)) { - cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen8; + cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9; intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8; |