2 files changed, 36 insertions, 2 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index ba015ca7..734267a2 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -31,7 +31,7 @@
 #include <string.h>
 
 #define MAX_GROUP_SIZE_IN_HALFSLICE   512
-static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+128; }
+static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+256; }
 
 /* "Varing" payload is the part of the curbe that changes accross threads in the
  *  same work group. Right now, it consists in local IDs and block IPs
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 958d5aff..9e442c05 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -104,6 +104,9 @@ intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL;
 typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu);
 intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL;
 
+typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL;
+
 static void
 intel_gpgpu_sync(void *buf)
 {
@@ -527,7 +530,7 @@ intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx)
 }
 
 static void
-intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
+intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu)
 {
   gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
     intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
@@ -546,6 +549,34 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
 }
 
 static void
+intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu)
+{
+  gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
+    intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
+  memset(pc, 0, sizeof(*pc));
+  pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+  pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+  pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+  pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+  pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+  pc->dw1.cs_stall = 1;
+  pc->dw1.dc_flush_enable = 1;
+
+  pc = (gen6_pipe_control_t*)
+    intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
+  memset(pc, 0, sizeof(*pc));
+  pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+  pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+  pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+  pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+  pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+  pc->dw1.render_target_cache_flush_enable = 1;
+  pc->dw1.texture_cache_invalidation_enable = 1;
+  pc->dw1.cs_stall = 1;
+  ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
 intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm)
 {
   BEGIN_BATCH(gpgpu->batch, 9);
@@ -1910,6 +1941,7 @@ intel_set_gpgpu_callbacks(int device_id)
     intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
     intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
     cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
+    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
     return;
   }
 
@@ -1928,6 +1960,7 @@ intel_set_gpgpu_callbacks(int device_id)
     intel_gpgpu_post_action = intel_gpgpu_post_action_gen75;
     intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb
     intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75;
+    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75;
   }
   else if (IS_IVYBRIDGE(device_id)) {
     cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
@@ -1942,5 +1975,6 @@ intel_set_gpgpu_callbacks(int device_id)
     intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7;
     intel_gpgpu_post_action = intel_gpgpu_post_action_gen7;
     intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7;
+    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
   }
 }