summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2015-01-29 13:46:23 +0800
committerYang Rong <rong.r.yang@intel.com>2015-01-29 16:14:07 +0800
commitca2849214de7bec872a15d2ce3bcc9877a328898 (patch)
tree3a6abd164bf22e9b102ab97149075cdf13445675
parente70bd079498255b6b14f3c3e12ac4628d1b24813 (diff)
SKL: Add the function gen9' intel_build_idrt.
Correct stuct gen8_interface_descriptor. Add function intel_gpgpu_build_idrt_gen9 for difference slm size setting. Disable skl's global barrier now.
-rw-r--r--src/intel/intel_gpgpu.c46
-rw-r--r--src/intel/intel_structs.h5
2 files changed, 48 insertions, 3 deletions
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index cd45ff9f..c02a95c9 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1433,6 +1433,50 @@ intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
desc->desc6.slm_sz = slm_sz;
}
+static void
+intel_gpgpu_build_idrt_gen9(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
+{
+ gen8_interface_descriptor_t *desc;
+
+ desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset);
+
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = 0; /* reloc */
+ desc->desc2.single_program_flow = 0;
+ desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */
+ desc->desc6.rounding_mode = 0; /* round to nearest even */
+
+ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0);
+ desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5;
+ desc->desc4.binding_table_entry_count = 0; /* no prefetch */
+ desc->desc4.binding_table_pointer = 0;
+ desc->desc5.curbe_read_len = kernel->curbe_sz / 32;
+ desc->desc5.curbe_read_offset = 0;
+
+ /* Barriers / SLM are automatically handled on Gen7+ */
+ size_t slm_sz = kernel->slm_sz;
+ /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */
+ desc->desc6.group_threads_num = kernel->thread_n;
+ desc->desc6.barrier_enable = kernel->use_slm;
+ if (slm_sz == 0)
+ slm_sz = 0;
+ else if (slm_sz <= 1*KB)
+ slm_sz = 1;
+ else if (slm_sz <= 2*KB)
+ slm_sz = 2;
+ else if (slm_sz <= 4*KB)
+ slm_sz = 3;
+ else if (slm_sz <= 8*KB)
+ slm_sz = 4;
+ else if (slm_sz <= 16*KB)
+ slm_sz = 5;
+ else if (slm_sz <= 32*KB)
+ slm_sz = 6;
+ else
+ slm_sz = 7;
+ desc->desc6.slm_sz = slm_sz;
+}
+
static int
intel_gpgpu_upload_curbes(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
{
@@ -2040,7 +2084,7 @@ intel_set_gpgpu_callbacks(int device_id)
intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8;
intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8;
cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8;
- intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen8;
+ intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen9;
intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index b4635f49..fd6a82b9 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -142,8 +142,9 @@ typedef struct gen8_interface_descriptor
} desc5;
struct {
- uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */
- uint32_t barrier_return_byte:8;
+ uint32_t group_threads_num:10; /* 0..64, 0 - no barrier use */
+ uint32_t pad:5;
+ uint32_t global_barrier_enable:1;
uint32_t slm_sz:5; /* 0..16 - 0K..64K */
uint32_t barrier_enable:1;
uint32_t rounding_mode:2;