summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2016-07-15 18:37:05 -0400
committerFelix Kuehling <Felix.Kuehling@amd.com>2016-07-15 23:59:23 -0400
commitc8bd412e87e555f794458f07699a8dbf9c077991 (patch)
treea0b0abf3f9c3ea3c4518d7d3f93b006daca3b41b
parentd020404b487086ea3ea9bbacef694126a887f538 (diff)
drm/amdgpu: Make SDMA phase quantum configurableroc-1.2.0
Set a configurable SDMA phase quantum when enabling SDMA context switching. The default value significantly reduces SDMA latency in page table updates when user-mode SDMA queues have concurrent activity, compared to the initial HW setting. Change-Id: Id99c52e893d0358374ea9a3fbc2181f0c60b1b42 Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c32
4 files changed, 67 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 53a2e612498d..09e4285de3eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -90,6 +90,7 @@ extern int amdgpu_sched_hw_submission;
extern int amdgpu_powerplay;
extern unsigned amdgpu_pcie_gen_cap;
extern unsigned amdgpu_pcie_lane_cap;
+extern unsigned amdgpu_sdma_phase_quantum;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 48ce40823ca8..93e44ce78181 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -84,6 +84,7 @@ int amdgpu_sched_hw_submission = 2;
int amdgpu_powerplay = -1;
unsigned amdgpu_pcie_gen_cap = 0;
unsigned amdgpu_pcie_lane_cap = 0;
+unsigned amdgpu_sdma_phase_quantum = 32;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -174,6 +175,9 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
+module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 2b96fa7e8961..c1035454023d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -351,14 +351,44 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
*/
static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{
- u32 f32_cntl;
+ u32 f32_cntl, phase_quantum = 0;
int i;
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
if (enable) {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 1);
+ if (amdgpu_sdma_phase_quantum) {
+ WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ }
} else {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 303f87ff9b54..3b6568506e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -576,9 +576,33 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
*/
static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{
- u32 f32_cntl;
+ u32 f32_cntl, phase_quantum = 0;
int i;
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
if (enable) {
@@ -586,6 +610,12 @@ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
AUTO_CTXSW_ENABLE, 1);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
ATC_L1_ENABLE, 1);
+ if (amdgpu_sdma_phase_quantum) {
+ WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ }
} else {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0);