summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuiling Song <ruiling.song@intel.com>2014-05-16 11:26:30 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-05-16 20:01:54 +0800
commit3cdc1037d0a7c11b6d35f0b0474fa96656c4c98d (patch)
treeeb2aa6a1be6cdb2b953d601bef6da17daf7dfec1
parent9d93bfe2a1e7684b526a2d4ba0fee95e2a6e0b30 (diff)
Runtime: Fix a bug in L3 configuration.
We forgot to set L3SQCREG1 register. And also add a more suitable configuration. This patch improves Luxmark score above 50%. Signed-off-by: Ruiling Song <ruiling.song@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--src/intel/intel_defines.h1
-rw-r--r--src/intel/intel_gpgpu.c18
2 files changed, 14 insertions, 5 deletions
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index 5139e439..02ffde47 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -299,6 +299,7 @@
IS_G4X(intel->device_id) ? 384 : 256)
// L3 cache stuff
+#define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010)
#define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020)
#define GEN7_L3_CNTL_REG3_ADDRESS_OFFSET (0xB024)
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 3d6fd301..da8fdc49 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -286,13 +286,15 @@ intel_gpgpu_load_idrt(intel_gpgpu_t *gpgpu)
static const uint32_t gpgpu_l3_config_reg1[] = {
0x00080040, 0x02040040, 0x00800040, 0x01000038,
0x02000030, 0x01000038, 0x00000038, 0x00000040,
- 0x0A140091, 0x09100091, 0x08900091, 0x08900091
+ 0x0A140091, 0x09100091, 0x08900091, 0x08900091,
+ 0x010000a1
};
static const uint32_t gpgpu_l3_config_reg2[] = {
0x00000000, 0x00000000, 0x00080410, 0x00080410,
0x00040410, 0x00040420, 0x00080420, 0x00080020,
- 0x00204080, 0x00244890, 0x00284490, 0x002444A0
+ 0x00204080, 0x00244890, 0x00284490, 0x002444A0,
+ 0x00040810
};
/* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. */
@@ -332,18 +334,24 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
static void
intel_gpgpu_set_L3(intel_gpgpu_t *gpgpu, uint32_t use_slm)
{
- BEGIN_BATCH(gpgpu->batch, 6);
+ BEGIN_BATCH(gpgpu->batch, 9);
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
+
+ OUT_BATCH(gpgpu->batch, 0x00730000);
+
OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
+
if (use_slm)
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[8]);
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[12]);
else
OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]);
OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
if (use_slm)
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[8]);
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[12]);
else
OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
ADVANCE_BATCH(gpgpu->batch);