summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-12-30 18:15:00 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-01-05 08:47:39 +0800
commit6d2f0b8ba87c841d17fb8e7db1a0521f0306fa5e (patch)
tree07237127a26fdf114fbe12e9ade5c8e507524dd1
parentfe0cf82f3d12e5e4f493e19e23b28817fb9ecf9c (diff)
CL/Driver: enable atomics in L3 for HSW.hsw_darktable
This could get more than 10x boost for some atomic stress workloads. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r--src/intel/intel_defines.h4
-rw-r--r--src/intel/intel_gpgpu.c9
2 files changed, 12 insertions, 1 deletions
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index e983718e..a120f411 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -304,6 +304,10 @@
#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \
IS_G4X(intel->device_id) ? 384 : 256)
+// HSW
+#define HSW_SCRATCH1_OFFSET (0xB038)
+#define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C)
+
// L3 cache stuff
#define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010)
#define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020)
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 8d7ef89e..6562ff8c 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -631,7 +631,14 @@ static void
intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
{
/* still set L3 in batch buffer for fulsim. */
- BEGIN_BATCH(gpgpu->batch, 9);
+ BEGIN_BATCH(gpgpu->batch, 15);
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ /* FIXME Kernel always disable the atomic in L3, let's enable it here.*/
+ OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
+ OUT_BATCH(gpgpu->batch, 0); /* enable atomic in L3 */
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
+ OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in L3 */
OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
OUT_BATCH(gpgpu->batch, 0x00800000);