From b3c85a0fb2c79f2c945fa1305b39974d0acf3105 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 10 May 2017 20:06:58 +0200 Subject: drm/amdgpu: fix fundamental suspend/resume issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reinitializing the VM manager during suspend/resume is a very very bad idea since all the VMs are still active and kicking. This can lead to random VM faults after resume when new processes become the same client ID assigned. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 15 ++------------- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 15 ++------------- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 15 ++------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++-------------- 6 files changed, 30 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 07ff3b1514f1..1bf36c3542c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -672,6 +672,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; + atomic64_set(&id->owner, 0); id->gds_base = 0; id->gds_size = 0; id->gws_base = 0; @@ -680,6 +681,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, id->oa_size = 0; } +/** + * amdgpu_vm_reset_all_id - reset VMID to zero + * + * @adev: amdgpu device structure + * + * Reset VMID to force flush on next use + */ +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vm_id_manager *id_mgr = + &adev->vm_manager.id_mgr[i]; + + for (j = 1; j < id_mgr->num_ids; ++j) + amdgpu_vm_reset_id(adev, i, j); + } +} + /** * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo * @@ -2270,7 +2291,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) adev->vm_manager.seqno[i] = 0; - atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d97e28b4bdc4..e1d951ece433 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -204,6 +204,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, unsigned vmid); +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a572979f186c..d860939152df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -950,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v6_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v6_0_hw_fini(adev); return 0; @@ -968,16 +964,9 @@ static int gmc_v6_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v6_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v6_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index a9083a16a250..2750e5c23813 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1117,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v7_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v7_0_hw_fini(adev); return 0; @@ -1135,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v7_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v7_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 4ac99784160a..f56b4089ee9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1209,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v8_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v8_0_hw_fini(adev); return 0; @@ -1227,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v8_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v8_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index dc1e1c1d6b24..f936332a069d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -791,10 +791,6 @@ static int gmc_v9_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v9_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v9_0_hw_fini(adev); return 0; @@ -809,17 +805,9 @@ static int gmc_v9_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v9_0_vm_init(adev); - if (r) { - dev_err(adev->dev, - "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v9_0_is_idle(void *handle) -- cgit v1.2.3 From 0a646f331db0eb9efc8d3a95a44872036d441d58 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:10:02 -0400 Subject: drm/amdgpu/ci: disable mclk switching for high refresh rates (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the vblank period would allow it, it still seems to be problematic on some cards. v2: fix logic inversion (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 6dc1410b380f..ec93714e4524 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev) u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (amdgpu_dpm_get_vrefresh(adev) > 120) + return true; + if (vblank_time < switch_limit) return true; else -- cgit v1.2.3 From 58d7e3e427db1bd68f33025519a9468140280a75 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:14:14 -0400 Subject: drm/radeon/ci: disable mclk switching for high refresh rates (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the vblank period would allow it, it still seems to be problematic on some cards. v2: fix logic inversion (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 7ba450832e6b..ea36dc4dd5d2 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev) u32 vblank_time = r600_dpm_get_vblank_time(rdev); u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (r600_dpm_get_vrefresh(rdev) > 120) + return true; + if (vblank_time < switch_limit) return true; else -- cgit v1.2.3 From 09be4a5219610a6fae3215d4f51f948d6f5d2609 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:46:12 -0400 Subject: drm/amd/powerplay/smu7: add vblank check for mclk switching (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check to make sure the vblank period is long enough to support mclk switching. v2: drop needless initial assignment (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 31 +++++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index a74a3db3056c..1445c51b6d05 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2655,6 +2655,28 @@ static int smu7_get_power_state_size(struct pp_hwmgr *hwmgr) return sizeof(struct smu7_power_state); } +static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, + uint32_t vblank_time_us) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t switch_limit_us; + + switch (hwmgr->chip_id) { + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + switch_limit_us = data->is_memory_gddr5 ? 190 : 150; + break; + default: + switch_limit_us = data->is_memory_gddr5 ? 450 : 150; + break; + } + + if (vblank_time_us < switch_limit_us) + return true; + else + return false; +} static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *request_ps, @@ -2669,6 +2691,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, bool disable_mclk_switching; bool disable_mclk_switching_for_frame_lock; struct cgs_display_info info = {0}; + struct cgs_mode_info mode_info = {0}; const struct phm_clock_and_voltage_limits *max_limits; uint32_t i; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -2677,6 +2700,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, int32_t count; int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0; + info.mode_info = &mode_info; data->battery_state = (PP_StateUILabel_Battery == request_ps->classification.ui_label); @@ -2703,8 +2727,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, cgs_get_active_displays_info(hwmgr->device, &info); - /*TO DO result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/ - minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; @@ -2769,8 +2791,9 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - disable_mclk_switching = (1 < info.display_count) || - disable_mclk_switching_for_frame_lock; + disable_mclk_switching = ((1 < info.display_count) || + disable_mclk_switching_for_frame_lock || + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; -- cgit v1.2.3 From 2275a3a2fe9914ba6d76c8ea490da3c08342bd19 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:57:41 -0400 Subject: drm/amd/powerplay/smu7: disable mclk switching for high refresh rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the vblank period would allow it, it still seems to be problematic on some cards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 1445c51b6d05..102eb6d029fa 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2793,7 +2793,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, disable_mclk_switching = ((1 < info.display_count) || disable_mclk_switching_for_frame_lock || - smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) || + (mode_info.refresh_rate > 120)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; -- cgit v1.2.3 From 3d18e33735a02b1a90aecf14410bf3edbfd4d3dc Mon Sep 17 00:00:00 2001 From: Lyude Date: Thu, 11 May 2017 19:31:12 -0400 Subject: drm/radeon: Unbreak HPD handling for r600+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We end up reading the interrupt register for HPD5, and then writing it to HPD6 which on systems without anything using HPD5 results in permanently disabling hotplug on one of the display outputs after the first time we acknowledge a hotplug interrupt from the GPU. This code is really bad. But for now, let's just fix this. I will hopefully have a large patch series to refactor all of this soon. Reviewed-by: Christian König Signed-off-by: Lyude Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/evergreen.c | 4 ++-- drivers/gpu/drm/radeon/r600.c | 2 +- drivers/gpu/drm/radeon/si.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ccebe0f8d2e1..008c145b7f29 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7401,7 +7401,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -7431,7 +7431,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f130ec41ee4b..0bf103536404 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4927,7 +4927,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -4958,7 +4958,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 0a085176e79b..e06e2d8feab3 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3988,7 +3988,7 @@ static void r600_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index ceee87f029d9..76d1888528e6 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6317,7 +6317,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -6348,7 +6348,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } -- cgit v1.2.3 From 7c4378f4523d4af05b5941ea906e7032631eb753 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 11 May 2017 18:22:17 +0800 Subject: drm/amdgpu: fix NULL pointer panic of emit_gds_switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ 338.384770] BUG: unable to handle kernel NULL pointer dereference at (null) [ 338.384817] IP: [< (null)>] (null) [ 338.385505] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 338.385950] Call Trace: [ 338.385993] [] ? amdgpu_vm_flush+0x283/0x400 [amdgpu] [ 338.386025] [] ? printk+0x4d/0x4f [ 338.386074] [] amdgpu_ib_schedule+0x4a6/0x4d0 [amdgpu] [ 338.386140] [] amdgpu_job_run+0x64/0x180 [amdgpu] [ 338.386203] [] amd_sched_main+0x2e9/0x4a0 [amdgpu] [ 338.386232] [] ? prepare_to_wait_event+0x110/0x110 [ 338.386295] [] ? amd_sched_select_entity+0xe0/0xe0 [amdgpu] [ 338.386327] [] kthread+0xd3/0xf0 [ 338.386349] [] ? kthread_park+0x60/0x60 [ 338.386376] [] ret_from_fork+0x25/0x30 [ 338.386401] Code: Bad RIP value. [ 338.386420] RIP [< (null)>] (null) [ 338.386443] RSP [ 338.386458] CR2: 0000000000000000 [ 338.398508] ---[ end trace 4c66fcdc74b9a0a2 ]--- Signed-off-by: Chunming Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1bf36c3542c1..8ecf82c5fe74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -634,7 +634,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) mutex_unlock(&id_mgr->lock); } - if (gds_switch_needed) { + if (ring->funcs->emit_gds_switch && gds_switch_needed) { id->gds_base = job->gds_base; id->gds_size = job->gds_size; id->gws_base = job->gws_base; -- cgit v1.2.3 From 3083696a1ee68f4845f8e9a21b91e343ff25eff3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 May 2017 13:13:45 +0800 Subject: drm/amd/powerplay: fix a signedness bugs Smatch complains about a signedness bug here: vega10_hwmgr.c:4202 vega10_force_clock_level() warn: always true condition '(i >= 0) => (0-u32max >= 0)' Fixes: 7b52db39a4c2 ("drm/amd/powerplay: fix bug sclk/mclk level can't be set on vega10.") Signed-off-by: Dan Carpenter Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index ad30f5d3a10d..2614af2f553f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -4186,7 +4186,7 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); - uint32_t i; + int i; if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) return -EINVAL; -- cgit v1.2.3 From b62ce397675502325d4282924bf70cfb6a005c3a Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 22 May 2017 13:11:41 +0800 Subject: drm/amdgpu: fix null point error when rmmod amdgpu. this bug happened when amdgpu load failed. [ 75.740951] BUG: unable to handle kernel paging request at 00000000000031c0 [ 75.748167] IP: [] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu] [ 75.755774] PGD 0 [ 75.759185] Oops: 0000 [#1] SMP [ 75.762408] Modules linked in: amdgpu(OE-) ttm(OE) drm_kms_helper(OE) drm(OE) i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) eeepc_wmi(E) asus_wmi(E) sparse_keymap(E) intel_rapl(E) snd_hda_codec_hdmi(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_intel(E) snd_hda_codec(E) snd_hda_core(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) snd_hwdep(E) snd_pcm(E) snd_seq_midi(E) coretemp(E) kvm_intel(E) snd_seq_midi_event(E) snd_rawmidi(E) kvm(E) snd_seq(E) joydev(E) snd_seq_device(E) snd_timer(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) mei_me(E) ghash_clmulni_intel(E) snd(E) aesni_intel(E) mei(E) soundcore(E) aes_x86_64(E) shpchp(E) serio_raw(E) lrw(E) acpi_pad(E) gf128mul(E) glue_helper(E) ablk_helper(E) mac_hid(E) [ 75.835574] cryptd(E) parport_pc(E) ppdev(E) lp(E) nfsd(E) parport(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) autofs4(E) hid_generic(E) usbhid(E) mxm_wmi(E) psmouse(E) e1000e(E) ptp(E) pps_core(E) ahci(E) libahci(E) wmi(E) video(E) i2c_hid(E) hid(E) [ 75.858489] CPU: 5 PID: 1603 Comm: rmmod Tainted: G OE 4.9.0-custom #2 [ 75.866183] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 0901 08/31/2015 [ 75.875050] task: ffff88045d1bbb80 task.stack: ffffc90002de4000 [ 75.881094] RIP: 0010:[] [] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu] [ 75.891238] RSP: 0018:ffffc90002de7d48 EFLAGS: 00010286 [ 75.896648] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001 [ 75.903933] RDX: 0000000000000000 RSI: ffff88045d1bbb80 RDI: 0000000000000286 [ 75.911183] RBP: ffffc90002de7d50 R08: 0000000000000502 R09: 0000000000000004 [ 75.918449] R10: 0000000000000000 R11: 0000000000000001 R12: ffff880464bf0000 [ 75.925675] R13: ffffffffa0853000 R14: 0000000000000000 R15: 0000564e44f88210 [ 75.932980] FS: 00007f13d5400700(0000) GS:ffff880476540000(0000) knlGS:0000000000000000 [ 75.941238] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 75.947088] CR2: 00000000000031c0 CR3: 000000045fd0b000 CR4: 00000000003406e0 [ 75.954332] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 75.961566] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 75.968834] Stack: [ 75.970881] ffff880464bf0000 ffffc90002de7d60 ffffffffa0636592 ffffc90002de7d80 [ 75.978454] ffffffffa059015f ffff880464bf0000 ffff880464bf0000 ffffc90002de7da8 [ 75.986076] ffffffffa0595216 ffff880464bf0000 ffff880460f4d000 ffffffffa0853000 [ 75.993692] Call Trace: [ 75.996177] [] amdgpu_driver_lastclose_kms+0x12/0x20 [amdgpu] [ 76.003700] [] drm_lastclose+0x2f/0xd0 [drm] [ 76.009777] [] drm_dev_unregister+0x16/0xd0 [drm] [ 76.016255] [] drm_put_dev+0x34/0x70 [drm] [ 76.022139] [] amdgpu_pci_remove+0x15/0x20 [amdgpu] [ 76.028800] [] pci_device_remove+0x39/0xc0 [ 76.034661] [] __device_release_driver+0x9a/0x140 [ 76.041121] [] driver_detach+0xb8/0xc0 [ 76.046575] [] bus_remove_driver+0x55/0xd0 [ 76.052401] [] driver_unregister+0x2c/0x50 [ 76.058244] [] pci_unregister_driver+0x29/0x90 [ 76.064466] [] drm_pci_exit+0x9e/0xb0 [drm] [ 76.070507] [] amdgpu_exit+0x1c/0x32 [amdgpu] [ 76.076609] [] SyS_delete_module+0x1a0/0x200 [ 76.082627] [] ? rcu_eqs_enter.isra.36+0x4a/0x50 [ 76.089001] [] do_syscall_64+0x6e/0x180 [ 76.094583] [] entry_SYSCALL64_slow_path+0x25/0x25 [ 76.101114] Code: 94 c0 c3 31 c0 5d c3 0f 1f 40 00 0f 1f 44 00 00 55 31 c0 48 89 e5 53 48 89 fb 48 c7 c7 1d 21 84 a0 e8 ab 77 b3 e0 e8 fc 8b d7 e0 <48> 8b bb c0 31 00 00 48 85 ff 74 09 e8 ff eb fc ff 85 c0 75 03 [ 76.121432] RIP [] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu] Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 236d9950221b..c0d8c6ff6380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -425,10 +425,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev) { - struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev; + struct amdgpu_fbdev *afbdev; struct drm_fb_helper *fb_helper; int ret; + if (!adev) + return; + + afbdev = adev->mode_info.rfbdev; + if (!afbdev) return; -- cgit v1.2.3