diff options
-rw-r--r-- | src/cl_command_queue_gen7.c | 2 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 36 |
2 files changed, 36 insertions, 2 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index ba015ca7..734267a2 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -31,7 +31,7 @@ #include <string.h> #define MAX_GROUP_SIZE_IN_HALFSLICE 512 -static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+128; } +static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+256; } /* "Varing" payload is the part of the curbe that changes accross threads in the * same work group. Right now, it consists in local IDs and block IPs diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 958d5aff..9e442c05 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -104,6 +104,9 @@ intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL; typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL; +typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu); +intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL; + static void intel_gpgpu_sync(void *buf) { @@ -527,7 +530,7 @@ intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx) } static void -intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu) +intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu) { gen6_pipe_control_t* pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); @@ -546,6 +549,34 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu) } static void +intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu) +{ + gen6_pipe_control_t* pc = (gen6_pipe_control_t*) + intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); + memset(pc, 0, sizeof(*pc)); + pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; + pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; + pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; + pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; + pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; + pc->dw1.cs_stall = 1; + pc->dw1.dc_flush_enable = 1; + + pc = (gen6_pipe_control_t*) + intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); + memset(pc, 0, sizeof(*pc)); + pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; + pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; + pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; + pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; + pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; + pc->dw1.render_target_cache_flush_enable = 1; + pc->dw1.texture_cache_invalidation_enable = 1; + pc->dw1.cs_stall = 1; + ADVANCE_BATCH(gpgpu->batch); +} + +static void intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm) { BEGIN_BATCH(gpgpu->batch, 9); @@ -1910,6 +1941,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8; intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8; + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7; return; } @@ -1928,6 +1960,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_post_action = intel_gpgpu_post_action_gen75; intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75; + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75; } else if (IS_IVYBRIDGE(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7; @@ -1942,5 +1975,6 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7; intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7; + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7; } } |