diff options
author | Yang, Rong <rong.r.yang@intel.com> | 2014-12-26 14:57:58 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-29 09:50:42 +0800 |
commit | c8ce68191ad3489ecd9351fc6f84335610716c86 (patch) | |
tree | d52ecb8881470a7ce9779f6ee55730d2d94fba3a /src | |
parent | e2439a02ab8734cff7554521523ca93668754a57 (diff) |
Separate flush and invalidate in function intel_gpgpu_pipe_control.
HSW has a limitation when PIPECONTROL with RO Cache Invalidation:
Prior to programming a PIPECONTROL command with any of the RO cache invalidation bit set,
program a PIPECONTROL flush command with CS stall bit and HDC Flush bit set.
So must use two PIPECONTROL commands to flush and invalidate L3 cache in HSW.
This patch fix some random fails which has very heavy DC read/write in HSW.
Signed-off-by: Yang, Rong <rong.r.yang@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cl_command_queue_gen7.c | 2 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 36 |
2 files changed, 36 insertions, 2 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index ba015ca7..734267a2 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -31,7 +31,7 @@ #include <string.h> #define MAX_GROUP_SIZE_IN_HALFSLICE 512 -static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+128; } +static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+256; } /* "Varing" payload is the part of the curbe that changes accross threads in the * same work group. Right now, it consists in local IDs and block IPs diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 958d5aff..9e442c05 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -104,6 +104,9 @@ intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL; typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL; +typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu); +intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL; + static void intel_gpgpu_sync(void *buf) { @@ -527,7 +530,7 @@ intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx) } static void -intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu) +intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu) { gen6_pipe_control_t* pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); @@ -546,6 +549,34 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu) } static void +intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu) +{ + gen6_pipe_control_t* pc = (gen6_pipe_control_t*) + intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); + memset(pc, 0, sizeof(*pc)); + pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; + pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; + pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; + pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; + pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; + pc->dw1.cs_stall = 1; + pc->dw1.dc_flush_enable = 1; + + pc = (gen6_pipe_control_t*) + intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); + memset(pc, 0, sizeof(*pc)); + pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; + pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; + pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; + pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; + pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; + pc->dw1.render_target_cache_flush_enable = 1; + pc->dw1.texture_cache_invalidation_enable = 1; + pc->dw1.cs_stall = 1; + ADVANCE_BATCH(gpgpu->batch); +} + +static void intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm) { BEGIN_BATCH(gpgpu->batch, 9); @@ -1910,6 +1941,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8; intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8; + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7; return; } @@ -1928,6 +1960,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_post_action = intel_gpgpu_post_action_gen75; intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75; + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75; } else if (IS_IVYBRIDGE(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7; @@ -1942,5 +1975,6 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7; intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7; + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7; } } |