diff options
author | Yang Rong <rong.r.yang@intel.com> | 2015-01-29 13:46:23 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-01-29 16:14:07 +0800 |
commit | ca2849214de7bec872a15d2ce3bcc9877a328898 (patch) | |
tree | 3a6abd164bf22e9b102ab97149075cdf13445675 | |
parent | e70bd079498255b6b14f3c3e12ac4628d1b24813 (diff) |
SKL: Add the function gen9' intel_build_idrt.
Correct stuct gen8_interface_descriptor.
Add function intel_gpgpu_build_idrt_gen9 for difference slm size setting.
Disable skl's global barrier now.
-rw-r--r-- | src/intel/intel_gpgpu.c | 46 | ||||
-rw-r--r-- | src/intel/intel_structs.h | 5 |
2 files changed, 48 insertions, 3 deletions
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index cd45ff9f..c02a95c9 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1433,6 +1433,50 @@ intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) desc->desc6.slm_sz = slm_sz; } +static void +intel_gpgpu_build_idrt_gen9(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) +{ + gen8_interface_descriptor_t *desc; + + desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset); + + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = 0; /* reloc */ + desc->desc2.single_program_flow = 0; + desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */ + desc->desc6.rounding_mode = 0; /* round to nearest even */ + + assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0); + desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5; + desc->desc4.binding_table_entry_count = 0; /* no prefetch */ + desc->desc4.binding_table_pointer = 0; + desc->desc5.curbe_read_len = kernel->curbe_sz / 32; + desc->desc5.curbe_read_offset = 0; + + /* Barriers / SLM are automatically handled on Gen7+ */ + size_t slm_sz = kernel->slm_sz; + /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */ + desc->desc6.group_threads_num = kernel->thread_n; + desc->desc6.barrier_enable = kernel->use_slm; + if (slm_sz == 0) + slm_sz = 0; + else if (slm_sz <= 1*KB) + slm_sz = 1; + else if (slm_sz <= 2*KB) + slm_sz = 2; + else if (slm_sz <= 4*KB) + slm_sz = 3; + else if (slm_sz <= 8*KB) + slm_sz = 4; + else if (slm_sz <= 16*KB) + slm_sz = 5; + else if (slm_sz <= 32*KB) + slm_sz = 6; + else + slm_sz = 7; + desc->desc6.slm_sz = slm_sz; +} + static int intel_gpgpu_upload_curbes(intel_gpgpu_t *gpgpu, const void* data, uint32_t size) { @@ -2040,7 +2084,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8; intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8; cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8; - intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen8; + intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen9; intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8; intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8; diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index b4635f49..fd6a82b9 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -142,8 +142,9 @@ typedef struct gen8_interface_descriptor } desc5; struct { - uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */ - uint32_t barrier_return_byte:8; + uint32_t group_threads_num:10; /* 0..64, 0 - no barrier use */ + uint32_t pad:5; + uint32_t global_barrier_enable:1; uint32_t slm_sz:5; /* 0..16 - 0K..64K */ uint32_t barrier_enable:1; uint32_t rounding_mode:2; |