diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-31 10:02:30 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-01-07 16:06:03 +0800 |
commit | ef7127c03bd533277afc443b335c37a69927250a (patch) | |
tree | cf55ba1a532fba1af4bec6354ca1f775db6fc711 /src | |
parent | 688867be7e09b163e86966b1e6581bcfe22a0dfe (diff) |
CL/Driver: enable atomics in L3 for HSW.
This could get more than 10x boost for some atomic stress workloads.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/intel_defines.h | 4 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 11 |
2 files changed, 14 insertions, 1 deletions
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index e983718e..044c0049 100644 --- a/src/intel/intel_defines.h +++ b/src/intel/intel_defines.h @@ -304,6 +304,10 @@ #define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) +// HSW +#define HSW_SCRATCH1_OFFSET (0xB038) +#define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C) + // L3 cache stuff #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010) #define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020) diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index fb03bcc7..e6e37fb7 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -632,7 +632,16 @@ static void intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm) { /* still set L3 in batch buffer for fulsim. */ - BEGIN_BATCH(gpgpu->batch, 9); + BEGIN_BATCH(gpgpu->batch, 15); + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ + /* FIXME: KMD always disable the atomic in L3 for some reason. + I checked the spec, and don't think we need that workaround now. + Before I send a patch to kernel, let's just enable it here. */ + OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET); + OUT_BATCH(gpgpu->batch, 0); /* enable atomic in L3 */ + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ + OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET); + OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in L3 */ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET); OUT_BATCH(gpgpu->batch, 0x00800000); |