diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-29 13:06:06 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-29 13:06:06 -0800 |
commit | 2ba9f676d0a2e408aef14d679984c26373bf37b7 (patch) | |
tree | d84af8f6b6be035d3a013fbd2b6ec9b61c2e29bd /drivers/gpu/drm | |
parent | 517363b4949e4442dfe54b281ef5a8bbfafa3bbb (diff) | |
parent | 9794b89c50f7fc972c6b4ddc69693c9f9d1ae7d7 (diff) |
Merge tag 'drm-next-2024-11-29' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
"Merge window fixes, mostly amdgpu and xe, with a few other minor ones,
all looks fairly normal,
i915:
- hdcp: Fix when the first read and write are retried
xe:
- Wake up waiters after wait condition set to true
- Mark the preempt fence workqueue as reclaim
- Update xe2 graphics name string
- Fix a couple of guc submit races
- Fix pat index usage in migrate
- Ensure non-cached migrate pagetable bo mappings
- Take a PM ref in the delayed snapshot capture worker
amdgpu:
- SMU 13.0.6 fixes
- XGMI fixes
- SMU 13.0.7 fixes
- Misc code cleanups
- Plane refcount fixes
- DCN 4.0.1 fixes
- DC power fixes
- DTO fixes
- NBIO 7.11 fixes
- SMU 14.0.x fixes
- Reset fixes
- Enable DC on LoongArch
- Sysfs hotplug warning fix
- Misc small fixes
- VCN 4.0.3 fix
- Slab usage fix
- Jpeg delayed work fix
amdkfd:
- wptr handling fixes
radeon:
- Use ttm_bo_move_null()
- Constify struct pci_device_id
- Fix spurious hotplug
- HPD fix
rockchip
- fix 32-bit build"
* tag 'drm-next-2024-11-29' of https://gitlab.freedesktop.org/drm/kernel: (48 commits)
drm/xe: Take PM ref in delayed snapshot capture worker
drm/xe/migrate: use XE_BO_FLAG_PAGETABLE
drm/xe/migrate: fix pat index usage
drm/xe/guc_submit: fix race around suspend_pending
drm/xe/guc_submit: fix race around pending_disable
drm/xe: Update xe2_graphics name string
drm/rockchip: avoid 64-bit division
Revert "drm/radeon: Delay Connector detecting when HPD singals is unstable"
drm/amdgpu/jpeg: cancel the jpeg worker
drm/amdgpu: fix usage slab after free
drm/amdgpu/vcn: reset fw_shared when VCPU buffers corrupted on vcn v4.0.3
drm/amdgpu: Fix sysfs warning when hotplugging
drm/amdgpu: Add sysfs interface for vcn reset mask
drm/amdgpu/gmc7: fix wait_for_idle callers
drm/amd/pm: Remove arcturus min power limit
drm/amd/pm: skip setting the power source on smu v14.0.2/3
drm/amd/pm: disable pcie speed switching on Intel platform for smu v14.0.2/3
drm/amdkfd: Use the correct wptr size
drm/xe: Mark preempt fence workqueue as reclaim
drm/xe/ufence: Wake up waiters after setting ufence->signalled
...
Diffstat (limited to 'drivers/gpu/drm')
75 files changed, 690 insertions, 193 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 3a588fecb0c5..f44de9d4b6a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -330,6 +330,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, } list_for_each_entry(tmp_adev, reset_device_list, reset_list) { + amdgpu_set_init_level(tmp_adev, + AMDGPU_INIT_LEVEL_RESET_RECOVERY); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); r = aldebaran_mode2_restore_ip(tmp_adev); @@ -375,6 +377,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, tmp_adev); if (!r) { + amdgpu_set_init_level(tmp_adev, + AMDGPU_INIT_LEVEL_DEFAULT); amdgpu_irq_gpu_reset_resume_helper(tmp_adev); r = amdgpu_ib_ring_tests(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d8bc6da50016..4653a8d2823a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -839,6 +839,7 @@ struct amdgpu_mqd { enum amdgpu_init_lvl_id { AMDGPU_INIT_LEVEL_DEFAULT, AMDGPU_INIT_LEVEL_MINIMAL_XGMI, + AMDGPU_INIT_LEVEL_RESET_RECOVERY, }; struct amdgpu_init_level { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0171d240fcb0..9095c05e0269 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -156,6 +156,11 @@ struct amdgpu_init_level amdgpu_init_default = { .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, }; +struct amdgpu_init_level amdgpu_init_recovery = { + .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY, + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, +}; + /* * Minimal blocks needed to be initialized before a XGMI hive can be reset. This * is used for cases like reset on initialization where the entire hive needs to @@ -182,6 +187,9 @@ void amdgpu_set_init_level(struct amdgpu_device *adev, case AMDGPU_INIT_LEVEL_MINIMAL_XGMI: adev->init_lvl = &amdgpu_init_minimal_xgmi; break; + case AMDGPU_INIT_LEVEL_RESET_RECOVERY: + adev->init_lvl = &amdgpu_init_recovery; + break; case AMDGPU_INIT_LEVEL_DEFAULT: fallthrough; default: @@ -3250,7 +3258,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return r; } - if (!amdgpu_in_reset(adev)) + if (!amdgpu_reset_in_recovery(adev)) amdgpu_ras_set_error_query_ready(adev, true); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); @@ -4669,8 +4677,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) int idx; bool px; - amdgpu_fence_driver_sw_fini(adev); amdgpu_device_ip_fini(adev); + amdgpu_fence_driver_sw_fini(adev); amdgpu_ucode_release(&adev->firmware.gpu_info_fw); adev->accel_working = false; dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); @@ -5419,7 +5427,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) struct list_head *device_list_handle; bool full_reset, vram_lost = false; struct amdgpu_device *tmp_adev; - int r; + int r, init_level; device_list_handle = reset_context->reset_device_list; @@ -5428,10 +5436,18 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + /** + * If it's reset on init, it's default init level, otherwise keep level + * as recovery level. + */ + if (reset_context->method == AMD_RESET_METHOD_ON_INIT) + init_level = AMDGPU_INIT_LEVEL_DEFAULT; + else + init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY; + r = 0; list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - /* After reset, it's default init level */ - amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); + amdgpu_set_init_level(tmp_adev, init_level); if (full_reset) { /* post card */ amdgpu_ras_set_fed(tmp_adev, false); @@ -5518,6 +5534,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) out: if (!r) { + /* IP init is complete now, set level as default */ + amdgpu_set_init_level(tmp_adev, + AMDGPU_INIT_LEVEL_DEFAULT); amdgpu_irq_gpu_reset_resume_helper(tmp_adev); r = amdgpu_ib_ring_tests(tmp_adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f57cc72c43cf..69a6b6dba0a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1778,9 +1778,11 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) { - amdgpu_gfx_sysfs_xcp_fini(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); - amdgpu_gfx_sysfs_reset_mask_fini(adev); + if (adev->dev->kobj.sd) { + amdgpu_gfx_sysfs_xcp_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_reset_mask_fini(adev); + } } int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 04eb51674596..b6d2eb049f54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -447,6 +447,8 @@ int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev) void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev) { - if (adev->jpeg.num_jpeg_inst) - device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask); + if (adev->dev->kobj.sd) { + if (adev->jpeg.num_jpeg_inst) + device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask); + } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c index e8adfd0a570a..34b5e22b44e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -137,7 +137,8 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev) if (ret) return; - device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); + if (adev->dev->kobj.sd) + device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1bc95b0cdbb8..4c9fa24dd972 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1298,7 +1298,7 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, struct ras_manager *obj; /* in resume phase, no need to create aca fs node */ - if (adev->in_suspend || amdgpu_in_reset(adev)) + if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) return 0; obj = get_ras_manager(adev, blk); @@ -3610,7 +3610,7 @@ static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr; /* init event manager with node 0 on xgmi system */ - if (!amdgpu_in_reset(adev)) { + if (!amdgpu_reset_in_recovery(adev)) { if (!hive || adev->gmc.xgmi.node_id == 0) ras_event_mgr_init(ras->event_mgr); } @@ -3825,7 +3825,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev, r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1); if (r) { - if (adev->in_suspend || amdgpu_in_reset(adev)) { + if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) { /* in resume phase, if fail to enable ras, * clean up all ras fs nodes, and disable ras */ goto cleanup; @@ -3837,7 +3837,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev, amdgpu_persistent_edc_harvesting(adev, ras_block); /* in resume phase, no need to create ras fs node */ - if (adev->in_suspend || amdgpu_in_reset(adev)) + if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) return 0; ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm); @@ -3967,7 +3967,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) amdgpu_ras_event_mgr_init(adev); if (amdgpu_ras_aca_is_supported(adev)) { - if (amdgpu_in_reset(adev)) { + if (amdgpu_reset_in_recovery(adev)) { if (amdgpu_aca_is_enabled(adev)) r = amdgpu_aca_reset(adev); else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 24dae7cdbe95..a0acb65f4b40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -342,3 +342,8 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, strscpy(buf, "unknown", len); } } + +bool amdgpu_reset_in_recovery(struct amdgpu_device *adev) +{ + return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index f8628bc898df..4d9b9701139b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -158,4 +158,6 @@ extern struct amdgpu_reset_handler xgmi_reset_on_init_handler; int amdgpu_reset_do_xgmi_reset_on_init( struct amdgpu_reset_context *reset_context); +bool amdgpu_reset_in_recovery(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 8c89b69edc20..113f0d242618 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -451,6 +451,8 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev) if (!amdgpu_gpu_recovery) return; - if (adev->sdma.num_instances) - device_remove_file(adev->dev, &dev_attr_sdma_reset_mask); + if (adev->dev->kobj.sd) { + if (adev->sdma.num_instances) + device_remove_file(adev->dev, &dev_attr_sdma_reset_mask); + } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 74fdbf71d95b..599d3ca4e0ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -214,15 +214,15 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) drm_sched_entity_destroy(&adev->vce.entity); - amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, - (void **)&adev->vce.cpu_addr); - for (i = 0; i < adev->vce.num_rings; i++) amdgpu_ring_fini(&adev->vce.ring[i]); amdgpu_ucode_release(&adev->vce.fw); mutex_destroy(&adev->vce.idle_mutex); + amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, + (void **)&adev->vce.cpu_addr); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index aecb78e0519f..3e94c3ba1ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1283,3 +1283,40 @@ int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, return psp_execute_ip_fw_load(&adev->psp, &ucode); } + +static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (!adev) + return -ENODEV; + + return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset); +} + +static DEVICE_ATTR(vcn_reset_mask, 0444, + amdgpu_get_vcn_reset_mask, NULL); + +int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->vcn.num_vcn_inst) { + r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask); + if (r) + return r; + } + + return r; +} + +void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) +{ + if (adev->dev->kobj.sd) { + if (adev->vcn.num_vcn_inst) + device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 765b809d48a2..1e32311c1dff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -333,6 +333,8 @@ struct amdgpu_vcn { /* IP reg dump */ uint32_t *ip_dump; + + uint32_t supported_reset; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -519,5 +521,7 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, enum AMDGPU_UCODE_ID ucode_id); int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev); +int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev); +void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 3e6f9dfb61bb..110b120d7375 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -904,8 +904,10 @@ int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev) void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev) { - if (adev->vpe.num_instances) - device_remove_file(adev->dev, &dev_attr_vpe_reset_mask); + if (adev->dev->kobj.sd) { + if (adev->vpe.num_instances) + device_remove_file(adev->dev, &dev_attr_vpe_reset_mask); + } } static const struct amdgpu_ring_funcs vpe_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index b47422b0b5b1..74b4349e345a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -40,6 +40,11 @@ #define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210 #define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218 +#define XGMI_STATE_DISABLE 0xD1 +#define XGMI_STATE_LS0 0x81 +#define XGMI_LINK_ACTIVE 1 +#define XGMI_LINK_INACTIVE 0 + static DEFINE_MUTEX(xgmi_mutex); #define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4 @@ -289,6 +294,42 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = { SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)}, }; +static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num) +{ + const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070; + const int xgmi_inst = 2; + u32 link_inst; + u64 addr; + + link_inst = global_link_num % xgmi_inst; + + addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) + + adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst); + + return RREG32_PCIE_EXT(addr); +} + +int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num) +{ + u32 xgmi_state_reg_val; + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num); + break; + default: + return -EOPNOTSUPP; + } + + if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE) + return -ENOLINK; + + if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0) + return XGMI_LINK_ACTIVE; + + return XGMI_LINK_INACTIVE; +} + /** * DOC: AMDGPU XGMI Support * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 8cc7ab38db7c..d1282b4c6348 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -84,5 +84,7 @@ int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev); int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, struct amdgpu_hive_info *hive, int req_nps_mode); +int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, + int global_link_num); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 483a441b46aa..621aeca53880 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -254,8 +254,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) static void df_v3_6_sw_fini(struct amdgpu_device *adev) { - - device_remove_file(adev->dev, &dev_attr_df_cntr_avail); + if (adev->dev->kobj.sd) + device_remove_file(adev->dev, &dev_attr_df_cntr_avail); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 07f45f1a503a..b6016f11956e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -87,9 +87,14 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev) static void gmc_v7_0_mc_stop(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; u32 blackout; - gmc_v7_0_wait_for_idle((void *)adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); + if (!ip_block) + return; + + gmc_v7_0_wait_for_idle(ip_block); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) { @@ -251,9 +256,14 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, */ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; u32 tmp; int i, j; + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); + if (!ip_block) + return; + /* Initialize HDP */ for (i = 0, j = 0; i < 32; i++, j += 0x6) { WREG32((0xb05 + j), 0x00000000); @@ -264,7 +274,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (gmc_v7_0_wait_for_idle((void *)adev)) + if (gmc_v7_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); if (adev->mode_info.num_crtc) { @@ -288,7 +298,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22); WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22); - if (gmc_v7_0_wait_for_idle((void *)adev)) + if (gmc_v7_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -1183,7 +1193,7 @@ static int gmc_v7_0_soft_reset(struct amdgpu_ip_block *ip_block) if (srbm_soft_reset) { gmc_v7_0_mc_stop(adev); - if (gmc_v7_0_wait_for_idle((void *)adev)) + if (gmc_v7_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); tmp = RREG32(mmSRBM_SOFT_RESET); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 03b8b7cd5229..7319299f25ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -604,7 +604,7 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev) static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work); int cnt = 0; mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index d6823fb45d32..6e29b69894a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -150,7 +150,7 @@ static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&adev->jpeg.idle_work); if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 5063a38801d6..9ac421486f05 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -211,7 +211,7 @@ static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&adev->jpeg.idle_work); for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 10adbb7cbf53..e0df6800502c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -164,7 +164,7 @@ static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&adev->jpeg.idle_work); if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 193dfac5dc76..eca1963c33b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -202,7 +202,7 @@ static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&adev->jpeg.idle_work); if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index b48e2412e6cc..1d9e3b101c3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -227,7 +227,7 @@ static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&adev->jpeg.idle_work); for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 686f9605239d..58fb1e5fa89c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -168,7 +168,7 @@ static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&adev->jpeg.idle_work); if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 7a9adfda5814..814ab59fdd4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -275,6 +275,15 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(7, 11, 0): + case IP_VERSION(7, 11, 1): + case IP_VERSION(7, 11, 2): + case IP_VERSION(7, 11, 3): + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23); + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data); + break; + } } static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 9b01e074af47..2594467bdd87 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -220,6 +220,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, int r; struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); r = sienna_cichlid_mode2_restore_ip(tmp_adev); @@ -237,6 +238,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, amdgpu_irq_gpu_reset_resume_helper(tmp_adev); + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); r = amdgpu_ib_ring_tests(tmp_adev); if (r) { dev_err(tmp_adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index e70ebad3f9fa..70569ea906bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -221,6 +221,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, int r; struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); r = smu_v13_0_10_mode2_restore_ip(tmp_adev); @@ -234,6 +235,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, amdgpu_irq_gpu_reset_resume_helper(tmp_adev); + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); r = amdgpu_ib_ring_tests(tmp_adev); if (r) { dev_err(tmp_adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 5512259cac79..fcc8511e91ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -225,6 +225,10 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) vcn_v4_0_fw_shared_init(adev, i); } + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -247,6 +251,10 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -284,6 +292,7 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_vcn_sysfs_reset_mask_fini(adev); r = amdgpu_vcn_sw_fini(adev); kfree(adev->vcn.ip_dump); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index cf808a153fce..3f69b9b2bcd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -123,6 +123,20 @@ static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) return amdgpu_vcn_early_init(adev); } +static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); + + return 0; +} + /** * vcn_v4_0_3_sw_init - sw init for VCN block * @@ -155,8 +169,6 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; @@ -179,14 +191,13 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = true; - - if (amdgpu_vcnfw_log) - amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + vcn_v4_0_3_fw_shared_init(adev, i); } + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -213,6 +224,10 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -246,6 +261,7 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_vcn_sysfs_reset_mask_fini(adev); r = amdgpu_vcn_sw_fini(adev); kfree(adev->vcn.ip_dump); @@ -280,6 +296,8 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) } } else { for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn4_fw_shared *fw_shared; + vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; @@ -303,6 +321,11 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) regVCN_RB1_DB_CTRL); } + /* Re-init fw_shared when RAS fatal error occurred */ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (!fw_shared->sq.is_enabled) + vcn_v4_0_3_fw_shared_init(adev, i); + r = amdgpu_ring_test_helper(ring); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index fe2cc1a80c13..bd3d2bbdc16b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -170,6 +170,10 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; @@ -181,6 +185,11 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) } else { adev->vcn.ip_dump = ptr; } + + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -215,6 +224,7 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_vcn_sysfs_reset_mask_fini(adev); r = amdgpu_vcn_sw_fini(adev); kfree(adev->vcn.ip_dump); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 4843dcb9a5f7..2b0a830f5b29 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -125,7 +125,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev, memset(kq->pq_kernel_addr, 0, queue_size); memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel)); - memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel)); + memset(kq->wptr_kernel, 0, dev->kfd->device_info.doorbell_size); prop.queue_size = queue_size; prop.is_interop = false; @@ -306,12 +306,17 @@ int kq_submit_packet(struct kernel_queue *kq) if (amdgpu_amdkfd_is_fed(kq->dev->adev)) return -EIO; + /* Make sure ring buffer is updated before wptr updated */ + mb(); + if (kq->dev->kfd->device_info.doorbell_size == 8) { *kq->wptr64_kernel = kq->pending_wptr64; + mb(); /* Make sure wptr updated before ring doorbell */ write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, kq->pending_wptr64); } else { *kq->wptr_kernel = kq->pending_wptr; + mb(); /* Make sure wptr updated before ring doorbell */ write_kernel_doorbell(kq->queue->properties.doorbell_ptr, kq->pending_wptr); } diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index df17e79c45c7..11e3f2f3b174 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -7,20 +7,21 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y - depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64 + depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64 select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 - select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV)) + select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV)) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and Raven ASICs. - calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) - architectures built with Clang (all released versions), whereby the stack - frame gets blown up to well over 5k. This would cause an immediate kernel - panic on most architectures. We'll revert this when the following bug report - has been resolved: https://github.com/llvm/llvm-project/issues/41896. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || + ARM64 || LOONGARCH || RISCV) architectures built with Clang (all released + versions), whereby the stack frame gets blown up to well over 5k. This + would cause an immediate kernel panic on most architectures. We'll revert + this when the following bug report has been resolved: + https://github.com/llvm/llvm-project/issues/41896. config DRM_AMD_DC_FP def_bool n diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7872c6cabb14..1dd26d5df6b9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3141,7 +3141,10 @@ static void restore_planes_and_stream_state( return; for (i = 0; i < status->plane_count; i++) { + /* refcount will always be valid, restore everything else */ + struct kref refcount = status->plane_states[i]->refcount; *status->plane_states[i] = scratch->plane_states[i]; + status->plane_states[i]->refcount = refcount; } *stream = scratch->stream_state; } @@ -6097,11 +6100,11 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state { struct dc_power_profile profile = { 0 }; - if (!context || !context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc) + profile.power_level = !context->bw_ctx.bw.dcn.clk.p_state_change_support; + if (!context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc) return profile; struct dc *dc = context->clk_mgr->ctx->dc; - if (dc->res_pool->funcs->get_power_profile) profile.power_level = dc->res_pool->funcs->get_power_profile(context); return profile; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 0419ee7f22a5..252af83e34a5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -898,6 +898,9 @@ void hwss_setup_dpp(union block_sequence_params *params) struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_plane_state *plane_state = pipe_ctx->plane_state; + if (!plane_state) + return; + if (dpp && dpp->funcs->dpp_setup) { // program the input csc dpp->funcs->dpp_setup(dpp, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 33125b95c3a1..619fad17de55 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1501,6 +1501,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) res = spl_calculate_scaler_params(spl_in, spl_out); // Convert respective out params from SPL to scaler data translate_SPL_out_params_to_pipe_ctx(pipe_ctx, spl_out); + + /* Ignore scaler failure if pipe context plane is phantom plane */ + if (!res && plane_state->is_phantom) + res = true; } else { #endif /* depends on h_active */ @@ -1571,6 +1575,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) &plane_state->scaling_quality); } + /* Ignore scaler failure if pipe context plane is phantom plane */ + if (!res && plane_state->is_phantom) + res = true; + if (res && (pipe_ctx->plane_res.scl_data.taps.v_taps != temp.v_taps || pipe_ctx->plane_res.scl_data.taps.h_taps != temp.h_taps || pipe_ctx->plane_res.scl_data.taps.v_taps_c != temp.v_taps_c || diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e143fab00a86..104051935884 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.309" +#define DC_VER "3.2.310" #define MAX_SURFACES 3 #define MAX_PLANES 6 diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 838d72eaa87f..b363f5360818 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -1392,10 +1392,10 @@ static void dccg35_set_dtbclk_dto( /* The recommended programming sequence to enable DTBCLK DTO to generate * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should - * be set only after DTO is enabled + * be set only after DTO is enabled. + * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the + * programming is handled in program_pix_clk() regardless, so it can be removed from here. */ - REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], - PIPE_DTO_SRC_SEL[params->otg_inst], 2); } else { switch (params->otg_inst) { case 0: @@ -1412,9 +1412,12 @@ static void dccg35_set_dtbclk_dto( break; } - REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], - DTBCLK_DTO_ENABLE[params->otg_inst], 0, - PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); + /** + * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the + * programming is handled in program_pix_clk() regardless, so it can be removed from here. + */ + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 92e43a1e4dd4..601320b1be81 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -11,6 +11,7 @@ #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 +#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) { @@ -3886,6 +3887,10 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch #endif *p->hw_debug5 = false; +#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE + if (p->NumberOfActiveSurfaces > 1) + *p->hw_debug5 = true; +#else for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) && p->display_cfg->plane_descriptors[k].surface.dcc.enable @@ -3901,6 +3906,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif } +#endif } static enum dml2_odm_mode DecideODMMode(unsigned int HActive, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 6eccf0241d85..1ed21c1b86a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -258,12 +258,25 @@ static unsigned int find_preferred_pipe_candidates(const struct dc_state *existi * However this condition comes with a caveat. We need to ignore pipes that will * require a change in OPP but still have the same stream id. For example during * an MPC to ODM transiton. + * + * Adding check to avoid pipe select on the head pipe by utilizing dc resource + * helper function resource_get_primary_dpp_pipe and comparing the pipe index. */ if (existing_state) { for (i = 0; i < pipe_count; i++) { if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && - existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i && + (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe || + existing_state->res_ctx.pipe_ctx[i].next_odm_pipe)) continue; preferred_pipe_candidates[num_preferred_candidates++] = i; @@ -292,6 +305,14 @@ static unsigned int find_last_resort_pipe_candidates(const struct dc_state *exis */ if (existing_state) { for (i = 0; i < pipe_count; i++) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) || existing_state->res_ctx.pipe_ctx[i].stream_res.tg) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index ebd5df1a36e8..d9aaebfa3a0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -1093,14 +1093,11 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - // Final decission: can we do DSC or not? - if (is_dsc_possible) { - // Fill out the rest of DSC settings - dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported; - dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth; - dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4; - dsc_cfg->is_dp = dsc_sink_caps->is_dp; - } + /* Fill out the rest of DSC settings */ + dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported; + dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth; + dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4; + dsc_cfg->is_dp = dsc_sink_caps->is_dp; done: if (!is_dsc_possible) diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h index 4bd1dda07719..9fbd45c7dfef 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h @@ -200,6 +200,7 @@ struct dcn_hubbub_registers { uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_B; uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL1; uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL2; + uint32_t DCHUBBUB_CTRL_STATUS; }; #define HUBBUB_REG_FIELD_LIST_DCN32(type) \ @@ -320,7 +321,12 @@ struct dcn_hubbub_registers { type DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD;\ type DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD;\ type DCHUBBUB_TIMEOUT_DETECTION_EN;\ - type DCHUBBUB_TIMEOUT_TIMER_RESET + type DCHUBBUB_TIMEOUT_TIMER_RESET;\ + type ROB_UNDERFLOW_STATUS;\ + type ROB_OVERFLOW_STATUS;\ + type ROB_OVERFLOW_CLEAR;\ + type DCHUBBUB_HW_DEBUG;\ + type CSTATE_SWATH_CHK_GOOD_MODE #define HUBBUB_STUTTER_REG_FIELD_LIST(type) \ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h index 036bb3e6c957..46d8f5c70750 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h @@ -96,6 +96,7 @@ struct dcn20_hubbub { unsigned int det1_size; unsigned int det2_size; unsigned int det3_size; + bool allow_sdpif_rate_limit_when_cstate_req; }; void hubbub2_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 5d658e9bef64..92fab471b183 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1192,15 +1192,35 @@ static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) } } -static void dcn401_program_timeout_thresholds(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs) +static bool dcn401_program_arbiter(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + bool wm_pending = false; + uint32_t temp; + /* request backpressure and outstanding return threshold (unused)*/ //REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, arb_regs->req_stall_threshold); /* P-State stall threshold */ REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, arb_regs->pstate_stall_threshold); + + if (safe_to_lower || arb_regs->allow_sdpif_rate_limit_when_cstate_req > hubbub2->allow_sdpif_rate_limit_when_cstate_req) { + hubbub2->allow_sdpif_rate_limit_when_cstate_req = arb_regs->allow_sdpif_rate_limit_when_cstate_req; + + /* only update the required bits */ + REG_GET(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, &temp); + if (hubbub2->allow_sdpif_rate_limit_when_cstate_req) { + temp |= (1 << 5); + } else { + temp &= ~(1 << 5); + } + REG_UPDATE(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, temp); + } else { + wm_pending = true; + } + + return wm_pending; } static const struct hubbub_funcs hubbub4_01_funcs = { @@ -1226,7 +1246,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, .wait_for_det_update = dcn401_wait_for_det_update, - .program_timeout_thresholds = dcn401_program_timeout_thresholds, + .program_arbiter = dcn401_program_arbiter, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h index 5f1960722ebd..b1d9ea9d1c3d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h @@ -128,7 +128,12 @@ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, mask_sh),\ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, mask_sh),\ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_DETECTION_EN, mask_sh),\ - HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh) + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_UNDERFLOW_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_CLEAR, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, CSTATE_SWATH_CHK_GOOD_MODE, mask_sh) bool hubbub401_program_urgent_watermarks( struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 05424a9af58b..b029ec1b26d3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1925,9 +1925,9 @@ static void dcn20_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || - pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || + pipe_ctx->plane_state->update_flags.raw || + pipe_ctx->stream->update_flags.raw)) dcn20_update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index e8cc1bfa73f3..5de11e2837c0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1488,6 +1488,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); + /* update timeout thresholds */ + if (hubbub->funcs->program_arbiter) { + dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); + } /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { @@ -1529,6 +1533,10 @@ void dcn401_optimize_bandwidth( &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); + /* update timeout thresholds */ + if (hubbub->funcs->program_arbiter) { + hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, true); + } if (dc->clk_mgr->dc_mode_softmax_enabled) if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && @@ -1554,11 +1562,6 @@ void dcn401_optimize_bandwidth( pipe_ctx->dlg_regs.min_dst_y_next_start); } } - - /* update timeout thresholds */ - if (hubbub->funcs->program_timeout_thresholds) { - hubbub->funcs->program_timeout_thresholds(hubbub, &context->bw_ctx.bw.dcn.arb_regs); - } } void dcn401_fams2_global_control_lock(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 6c1d41c0f099..52b745667ef7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -228,7 +228,7 @@ struct hubbub_funcs { void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); - void (*program_timeout_thresholds)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs); + bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 7c8d61db153d..19568c359669 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -612,7 +612,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SR(DCHUBBUB_SDPIF_CFG1), \ SR(DCHUBBUB_MEM_PWR_MODE_CTRL), \ SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL1), \ - SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2) + SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2), \ + SR(DCHUBBUB_CTRL_STATUS) /* DCCG */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 614276200aa0..73a65913cb12 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -99,7 +99,7 @@ static struct spl_rect calculate_plane_rec_in_timing_active( * * recout_x = 128 + round(plane_x * 2304 / 1920) * recout_w = 128 + round((plane_x + plane_w) * 2304 / 1920) - recout_x - * recout_y = 0 + round(plane_y * 1440 / 1280) + * recout_y = 0 + round(plane_y * 1440 / 1200) * recout_h = 0 + round((plane_y + plane_h) * 1440 / 1200) - recout_y * * NOTE: fixed point division is not error free. To reduce errors @@ -739,14 +739,13 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, return SCL_MODE_SCALING_444_RGB_ENABLE; } - /* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV - * downscale without EASF + /* + * Bypass YUV if Y is 1:1 with no ISHARP + * Do not bypass UV at 1:1 for cositing to be applied */ - if ((!enable_isharp) && (!enable_easf)) { + if (!enable_isharp) { if (data->ratios.horz.value == one && data->ratios.vert.value == one) return SCL_MODE_SCALING_420_LUMA_BYPASS; - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return SCL_MODE_SCALING_420_CHROMA_BYPASS; } return SCL_MODE_SCALING_420_YCBCR_ENABLE; @@ -933,6 +932,7 @@ static bool spl_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; bool skip_easf = false; + bool is_ycbcr = spl_dscl_is_video_format(spl_in->basic_in.format); if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && @@ -1074,10 +1074,9 @@ static bool spl_get_optimal_number_of_taps( /* Sharpener requires scaler to be enabled, including for 1:1 * Check if ISHARP can be enabled - * If ISHARP is not enabled, for 1:1, set taps to 1 and disable - * EASF - * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if - * EASF is not enabled + * If ISHARP is not enabled, set taps to 1 if ratio is 1:1 + * except for chroma taps. Keep previous taps so it can + * handle cositing */ *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); @@ -1087,20 +1086,28 @@ static bool spl_get_optimal_number_of_taps( spl_scratch->scl_data.taps.h_taps = 1; spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_ycbcr) spl_scratch->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_ycbcr) spl_scratch->scl_data.taps.v_taps_c = 1; *enable_easf_v = false; *enable_easf_h = false; } else { if ((!*enable_easf_h) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz))) + spl_scratch->scl_data.taps.h_taps = 1; + + if ((!*enable_easf_v) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) + spl_scratch->scl_data.taps.v_taps = 1; + + if ((!*enable_easf_h) && !is_ycbcr && (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) spl_scratch->scl_data.taps.h_taps_c = 1; - if ((!*enable_easf_v) && + if ((!*enable_easf_v) && !is_ycbcr && (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) spl_scratch->scl_data.taps.v_taps_c = 1; } @@ -1111,8 +1118,7 @@ static bool spl_get_optimal_number_of_taps( static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { - bool ycbcr = format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN - && format <= SPL_PIXEL_FORMAT_VIDEO_END; + bool ycbcr = spl_dscl_is_video_format(format); if (ycbcr) { scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y; scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR; @@ -1746,6 +1752,32 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, spl_set_blur_scale_data(dscl_prog_data, data); } +/* Calculate recout, scaling ratio, and viewport, then get optimal number of taps */ +static bool spl_calculate_number_of_taps(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out, + bool *enable_easf_v, bool *enable_easf_h, bool *enable_isharp) +{ + bool res = false; + + memset(spl_scratch, 0, sizeof(struct spl_scratch)); + spl_scratch->scl_data.h_active = spl_in->h_active; + spl_scratch->scl_data.v_active = spl_in->v_active; + + // All SPL calls + /* recout calculation */ + /* depends on h_active */ + spl_calculate_recout(spl_in, spl_scratch, spl_out); + /* depends on pixel format */ + spl_calculate_scaling_ratios(spl_in, spl_scratch, spl_out); + /* depends on scaling ratios and recout, does not calculate offset yet */ + spl_calculate_viewport_size(spl_in, spl_scratch); + + res = spl_get_optimal_number_of_taps( + spl_in->basic_out.max_downscale_src_width, spl_in, + spl_scratch, &spl_in->scaling_quality, enable_easf_v, + enable_easf_h, enable_isharp); + return res; +} + /* Calculate scaler parameters */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -1760,23 +1792,9 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool enable_isharp = false; const struct spl_scaler_data *data = &spl_scratch.scl_data; - memset(&spl_scratch, 0, sizeof(struct spl_scratch)); - spl_scratch.scl_data.h_active = spl_in->h_active; - spl_scratch.scl_data.v_active = spl_in->v_active; + res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out, + &enable_easf_v, &enable_easf_h, &enable_isharp); - // All SPL calls - /* recout calculation */ - /* depends on h_active */ - spl_calculate_recout(spl_in, &spl_scratch, spl_out); - /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); - /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, &spl_scratch); - - res = spl_get_optimal_number_of_taps( - spl_in->basic_out.max_downscale_src_width, spl_in, - &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, - &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario @@ -1824,3 +1842,20 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) return res; } + +/* External interface to get number of taps only */ +bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out) +{ + bool res = false; + bool enable_easf_v = false; + bool enable_easf_h = false; + bool enable_isharp = false; + struct spl_scratch spl_scratch; + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; + const struct spl_scaler_data *data = &spl_scratch.scl_data; + + res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out, + &enable_easf_v, &enable_easf_h, &enable_isharp); + spl_set_taps_data(dscl_prog_data, data); + return res; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h index 205e59a2a8ee..02a2d6725ed5 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h @@ -13,4 +13,6 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); +bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out); + #endif /* __DC_SPL_H__ */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h index 5ebe4cb40f9d..c38a01742d6f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h @@ -7571,6 +7571,8 @@ // base address: 0x10100000 #define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0 0xd000 #define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_BASE_IDX 5 +#define regRCC_DEV0_EPF5_STRAP4 0xd284 +#define regRCC_DEV0_EPF5_STRAP4_BASE_IDX 5 // addressBlock: nbio_nbif0_bif_rst_bif_rst_regblk diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h index eb8c556d9c93..3b96f1e5a180 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h @@ -50665,6 +50665,19 @@ #define RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_D1_SUPPORT_DEV0_F0_MASK 0x40000000L #define RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_D2_SUPPORT_DEV0_F0_MASK 0x80000000L +//RCC_DEV0_EPF5_STRAP4 +#define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_64BIT_EN_DEV0_F5__SHIFT 0x14 +#define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_EN_DEV0_F5__SHIFT 0x15 +#define RCC_DEV0_EPF5_STRAP4__STRAP_FLR_EN_DEV0_F5__SHIFT 0x16 +#define RCC_DEV0_EPF5_STRAP4__STRAP_PME_SUPPORT_DEV0_F5__SHIFT 0x17 +#define RCC_DEV0_EPF5_STRAP4__STRAP_INTERRUPT_PIN_DEV0_F5__SHIFT 0x1c +#define RCC_DEV0_EPF5_STRAP4__STRAP_AUXPWR_SUPPORT_DEV0_F5__SHIFT 0x1f +#define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_64BIT_EN_DEV0_F5_MASK 0x00100000L +#define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_EN_DEV0_F5_MASK 0x00200000L +#define RCC_DEV0_EPF5_STRAP4__STRAP_FLR_EN_DEV0_F5_MASK 0x00400000L +#define RCC_DEV0_EPF5_STRAP4__STRAP_PME_SUPPORT_DEV0_F5_MASK 0x0F800000L +#define RCC_DEV0_EPF5_STRAP4__STRAP_INTERRUPT_PIN_DEV0_F5_MASK 0x70000000L +#define RCC_DEV0_EPF5_STRAP4__STRAP_AUXPWR_SUPPORT_DEV0_F5_MASK 0x80000000L // addressBlock: nbio_nbif0_bif_rst_bif_rst_regblk //HARD_RST_CTRL diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index bb27c0d2a9ae..67a5de573943 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -357,11 +357,22 @@ struct dpm_clocks; struct amdgpu_xcp_metrics { /* Utilization Instantaneous (%) */ - u32 gfx_busy_inst[MAX_XCC]; - u16 jpeg_busy[NUM_JPEG_ENG]; - u16 vcn_busy[NUM_VCN]; + uint32_t gfx_busy_inst[MAX_XCC]; + uint16_t jpeg_busy[NUM_JPEG_ENG]; + uint16_t vcn_busy[NUM_VCN]; /* Utilization Accumulated (%) */ - u64 gfx_busy_acc[MAX_XCC]; + uint64_t gfx_busy_acc[MAX_XCC]; +}; + +struct amdgpu_xcp_metrics_v1_1 { + /* Utilization Instantaneous (%) */ + uint32_t gfx_busy_inst[MAX_XCC]; + uint16_t jpeg_busy[NUM_JPEG_ENG]; + uint16_t vcn_busy[NUM_VCN]; + /* Utilization Accumulated (%) */ + uint64_t gfx_busy_acc[MAX_XCC]; + /* Total App Clock Counter Accumulated */ + uint64_t gfx_below_host_limit_acc[MAX_XCC]; }; struct amd_pm_funcs { @@ -977,6 +988,105 @@ struct gpu_metrics_v1_6 { uint32_t pcie_lc_perf_other_end_recovery; }; +struct gpu_metrics_v1_7 { + struct metrics_table_header common_header; + + /* Temperature (Celsius) */ + uint16_t temperature_hotspot; + uint16_t temperature_mem; + uint16_t temperature_vrsoc; + + /* Power (Watts) */ + uint16_t curr_socket_power; + + /* Utilization (%) */ + uint16_t average_gfx_activity; + uint16_t average_umc_activity; // memory controller + + /* VRAM max bandwidthi (in GB/sec) at max memory clock */ + uint64_t mem_max_bandwidth; + + /* Energy (15.259uJ (2^-16) units) */ + uint64_t energy_accumulator; + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Accumulation cycle counter */ + uint32_t accumulation_counter; + + /* Accumulated throttler residencies */ + uint32_t prochot_residency_acc; + uint32_t ppt_residency_acc; + uint32_t socket_thm_residency_acc; + uint32_t vr_thm_residency_acc; + uint32_t hbm_thm_residency_acc; + + /* Clock Lock Status. Each bit corresponds to clock instance */ + uint32_t gfxclk_lock_status; + + /* Link width (number of lanes) and speed (in 0.1 GT/s) */ + uint16_t pcie_link_width; + uint16_t pcie_link_speed; + + /* XGMI bus width and bitrate (in Gbps) */ + uint16_t xgmi_link_width; + uint16_t xgmi_link_speed; + + /* Utilization Accumulated (%) */ + uint32_t gfx_activity_acc; + uint32_t mem_activity_acc; + + /*PCIE accumulated bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_acc; + + /*PCIE instantaneous bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_inst; + + /* PCIE L0 to recovery state transition accumulated count */ + uint64_t pcie_l0_to_recov_count_acc; + + /* PCIE replay accumulated count */ + uint64_t pcie_replay_count_acc; + + /* PCIE replay rollover accumulated count */ + uint64_t pcie_replay_rover_count_acc; + + /* PCIE NAK sent accumulated count */ + uint32_t pcie_nak_sent_count_acc; + + /* PCIE NAK received accumulated count */ + uint32_t pcie_nak_rcvd_count_acc; + + /* XGMI accumulated data transfer size(KiloBytes) */ + uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS]; + uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS]; + + /* XGMI link status(active/inactive) */ + uint16_t xgmi_link_status[NUM_XGMI_LINKS]; + + uint16_t padding; + + /* PMFW attached timestamp (10ns resolution) */ + uint64_t firmware_timestamp; + + /* Current clocks (Mhz) */ + uint16_t current_gfxclk[MAX_GFX_CLKS]; + uint16_t current_socclk[MAX_CLKS]; + uint16_t current_vclk0[MAX_CLKS]; + uint16_t current_dclk0[MAX_CLKS]; + uint16_t current_uclk; + + /* Number of current partition */ + uint16_t num_partition; + + /* XCP metrics stats */ + struct amdgpu_xcp_metrics_v1_1 xcp_stats[NUM_XCP]; + + /* PCIE other end recovery counter */ + uint32_t pcie_lc_perf_other_end_recovery; +}; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c index b52ce135d84d..d3ff6a831ed5 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c @@ -257,20 +257,18 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr) priv->smu_tables.entry[TABLE_WATERMARKS].size = sizeof(Watermarks_t); tools_size = 0x19000; - if (tools_size) { - ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, - tools_size, - PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].handle, - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr, - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].table); - if (ret) - goto err1; + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, + tools_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].handle, + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr, + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].table); + if (ret) + goto err1; - priv->smu_tables.entry[TABLE_PMSTATUSLOG].version = 0x01; - priv->smu_tables.entry[TABLE_PMSTATUSLOG].size = tools_size; - } + priv->smu_tables.entry[TABLE_PMSTATUSLOG].version = 0x01; + priv->smu_tables.entry[TABLE_PMSTATUSLOG].size = tools_size; /* allocate space for AVFS Fuse table */ ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 64f917959576..b8355293518f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1704,7 +1704,9 @@ static int smu_smc_hw_setup(struct smu_context *smu) return ret; } - if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) + pcie_gen = 4; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) pcie_gen = 3; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) pcie_gen = 2; @@ -1717,7 +1719,9 @@ static int smu_smc_hw_setup(struct smu_context *smu) * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */ - if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32) + pcie_width = 7; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) pcie_width = 6; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) pcie_width = 5; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index 0546b02e198d..29a4583db873 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -53,7 +53,7 @@ #define CTF_OFFSET_MEM 5 extern const int decoded_link_speed[5]; -extern const int decoded_link_width[7]; +extern const int decoded_link_width[8]; #define DECODE_GEN_SPEED(gen_speed_idx) (decoded_link_speed[gen_speed_idx]) #define DECODE_LANE_WIDTH(lane_width_idx) (decoded_link_width[lane_width_idx]) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 4b36c230e43a..12125303bb79 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1344,8 +1344,12 @@ static int arcturus_get_power_limit(struct smu_context *smu, *default_power_limit = power_limit; if (max_power_limit) *max_power_limit = power_limit; + /** + * No lower bound is imposed on the limit. Any unreasonable limit set + * will result in frequent throttling. + */ if (min_power_limit) - *min_power_limit = power_limit; + *min_power_limit = 0; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index fa30a9e1f27a..ab3c93ddce46 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -96,7 +96,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 #define LINK_SPEED_MAX 4 - #define SMU_13_0_6_DSCLK_THRESHOLD 140 #define MCA_BANK_IPID(_ip, _hwid, _type) \ @@ -370,7 +369,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_6); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_7); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -2321,8 +2320,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table { bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst; struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_6 *gpu_metrics = - (struct gpu_metrics_v1_6 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_7 *gpu_metrics = + (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; bool flag = smu_v13_0_6_is_unified_metrics(smu); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; @@ -2341,7 +2340,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_a = (MetricsTableA_t *)metrics_x; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 6); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag)); @@ -2448,6 +2447,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc, flag)[i]); gpu_metrics->xgmi_write_data_acc[i] = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, flag)[i]); + ret = amdgpu_get_xgmi_link_status(adev, i); + if (ret >= 0) + gpu_metrics->xgmi_link_status[i] = ret; } gpu_metrics->num_partition = adev->xcp_mgr->num_xcps; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c5d3e25cc967..4fd0354bd312 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2147,6 +2147,8 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; gpu_metrics->current_gfxclk = metrics->CurrClock[PPCLK_GFXCLK]; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_1]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index ecb0164d533e..a87040cb2f2e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -49,7 +49,7 @@ #define regMP1_SMN_IH_SW_INT_CTRL_mp1_14_0_0_BASE_IDX 0 const int decoded_link_speed[5] = {1, 2, 3, 4, 5}; -const int decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; +const int decoded_link_width[8] = {0, 1, 2, 4, 8, 12, 16, 32}; /* * DO NOT use these for err/warn/info/debug messages. * Use dev_err, dev_warn, dev_info and dev_dbg instead. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 59b369eff30f..687a0f5ac94f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1173,13 +1173,15 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_gen[i] == 0) ? "2.5GT/s," : (pcie_table->pcie_gen[i] == 1) ? "5.0GT/s," : (pcie_table->pcie_gen[i] == 2) ? "8.0GT/s," : - (pcie_table->pcie_gen[i] == 3) ? "16.0GT/s," : "", + (pcie_table->pcie_gen[i] == 3) ? "16.0GT/s," : + (pcie_table->pcie_gen[i] == 4) ? "32.0GT/s," : "", (pcie_table->pcie_lane[i] == 1) ? "x1" : (pcie_table->pcie_lane[i] == 2) ? "x2" : (pcie_table->pcie_lane[i] == 3) ? "x4" : (pcie_table->pcie_lane[i] == 4) ? "x8" : (pcie_table->pcie_lane[i] == 5) ? "x12" : - (pcie_table->pcie_lane[i] == 6) ? "x16" : "", + (pcie_table->pcie_lane[i] == 6) ? "x16" : + (pcie_table->pcie_lane[i] == 7) ? "x32" : "", pcie_table->clk_freq[i], (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ? @@ -1463,15 +1465,35 @@ static int smu_v14_0_2_update_pcie_parameters(struct smu_context *smu, struct smu_14_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_14_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; + int num_of_levels = pcie_table->num_of_link_levels; uint32_t smu_pcie_arg; int ret, i; - for (i = 0; i < pcie_table->num_of_link_levels; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) + if (!num_of_levels) + return 0; + + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { + if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap) + pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1]; + + if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap) + pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 1]; + + /* Force all levels to use the same settings */ + for (i = 0; i < num_of_levels; i++) { pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) pcie_table->pcie_lane[i] = pcie_width_cap; + } + } else { + for (i = 0; i < num_of_levels; i++) { + if (pcie_table->pcie_gen[i] > pcie_gen_cap) + pcie_table->pcie_gen[i] = pcie_gen_cap; + if (pcie_table->pcie_lane[i] > pcie_width_cap) + pcie_table->pcie_lane[i] = pcie_width_cap; + } + } + for (i = 0; i < num_of_levels; i++) { smu_pcie_arg = i << 16; smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; smu_pcie_arg |= pcie_table->pcie_lane[i]; @@ -2753,7 +2775,6 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_unique_id = smu_v14_0_2_get_unique_id, .get_power_limit = smu_v14_0_2_get_power_limit, .set_power_limit = smu_v14_0_2_set_power_limit, - .set_power_source = smu_v14_0_set_power_source, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, .set_power_profile_mode = smu_v14_0_2_set_power_profile_mode, .run_btc = smu_v14_0_run_btc, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index f1ab1a6bb467..dbbd3759bff3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1081,6 +1081,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(1, 6): structure_size = sizeof(struct gpu_metrics_v1_6); break; + case METRICS_VERSION(1, 7): + structure_size = sizeof(struct gpu_metrics_v1_7); + break; case METRICS_VERSION(2, 0): structure_size = sizeof(struct gpu_metrics_v2_0); break; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index f6d42ec6949e..f57e4dba2873 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1503,6 +1503,8 @@ static int hdcp2_deauthenticate_port(struct intel_connector *connector) static int hdcp2_authentication_key_exchange(struct intel_connector *connector) { struct intel_display *display = to_intel_display(connector); + struct intel_digital_port *dig_port = + intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; union { struct hdcp2_ake_init ake_init; @@ -1513,30 +1515,36 @@ static int hdcp2_authentication_key_exchange(struct intel_connector *connector) } msgs; const struct intel_hdcp_shim *shim = hdcp->shim; size_t size; - int ret, i; + int ret, i, max_retries; /* Init for seq_num */ hdcp->seq_num_v = 0; hdcp->seq_num_m = 0; + if (intel_encoder_is_dp(&dig_port->base) || + intel_encoder_is_mst(&dig_port->base)) + max_retries = 10; + else + max_retries = 1; + ret = hdcp2_prepare_ake_init(connector, &msgs.ake_init); if (ret < 0) return ret; /* * Retry the first read and write to downstream at least 10 times - * with a 50ms delay if not hdcp2 capable(dock decides to stop advertising - * hdcp2 capability for some reason). The reason being that - * during suspend resume dock usually keeps the HDCP2 registers inaccesible - * causing AUX error. This wouldn't be a big problem if the userspace - * just kept retrying with some delay while it continues to play low - * value content but most userpace applications end up throwing an error - * when it receives one from KMD. This makes sure we give the dock - * and the sink devices to complete its power cycle and then try HDCP - * authentication. The values of 10 and delay of 50ms was decided based - * on multiple trial and errors. + * with a 50ms delay if not hdcp2 capable for DP/DPMST encoders + * (dock decides to stop advertising hdcp2 capability for some reason). + * The reason being that during suspend resume dock usually keeps the + * HDCP2 registers inaccesible causing AUX error. This wouldn't be a + * big problem if the userspace just kept retrying with some delay while + * it continues to play low value content but most userpace applications + * end up throwing an error when it receives one from KMD. This makes + * sure we give the dock and the sink devices to complete its power cycle + * and then try HDCP authentication. The values of 10 and delay of 50ms + * was decided based on multiple trial and errors. */ - for (i = 0; i < 10; i++) { + for (i = 0; i < max_retries; i++) { if (!intel_hdcp2_get_capability(connector)) { msleep(50); continue; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 47aa06a9a942..5b69cc8011b4 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -760,16 +760,20 @@ static int radeon_audio_component_get_eld(struct device *kdev, int port, if (!rdev->audio.enabled || !rdev->mode_info.mode_config_initialized) return 0; - list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + const struct drm_connector_helper_funcs *connector_funcs = + connector->helper_private; + encoder = connector_funcs->best_encoder(connector); + + if (!encoder) + continue; + if (!radeon_encoder_is_digital(encoder)) continue; radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; if (!dig->pin || dig->pin->id != port) continue; - connector = radeon_get_connector_for_encoder(encoder); - if (!connector) - continue; *enabled = true; ret = drm_eld_size(connector->eld); memcpy(buf, connector->eld, min(max_bytes, ret)); diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index f9c73c55f04f..f9996304d943 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1255,16 +1255,6 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) goto exit; } } - - if (dret && radeon_connector->hpd.hpd != RADEON_HPD_NONE && - !radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) && - connector->connector_type == DRM_MODE_CONNECTOR_HDMIA) { - DRM_DEBUG_KMS("EDID is readable when HPD disconnected\n"); - schedule_delayed_work(&rdev->hotplug_work, msecs_to_jiffies(1000)); - ret = connector_status_disconnected; - goto exit; - } - if (dret) { radeon_connector->detected_by_load = false; radeon_connector_free_edid(connector); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 23d6d1a2586d..5e958cc223f4 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -248,10 +248,9 @@ int radeon_cik_support = 1; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); module_param_named(cik_support, radeon_cik_support, int, 0444); -static struct pci_device_id pciidlist[] = { +static const struct pci_device_id pciidlist[] = { radeon_PCI_IDS }; - MODULE_DEVICE_TABLE(pci, pciidlist); static const struct drm_driver kms_driver; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 69d0c12fa419..616d25c8c2de 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -219,8 +219,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == TTM_PL_TT && new_mem->mem_type == TTM_PL_SYSTEM) { radeon_ttm_tt_unbind(bo->bdev, bo->ttm); - ttm_resource_free(bo, &bo->resource); - ttm_bo_assign_mem(bo, new_mem); + ttm_bo_move_null(bo, new_mem); goto out; } if (rdev->ring[radeon_copy_ring_index(rdev)].ready && diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c index 9c796ee4c303..c8b362cc2b95 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c @@ -82,7 +82,7 @@ static void dw_hdmi_qp_rockchip_encoder_enable(struct drm_encoder *encoder) * comment in rk_hdptx_phy_power_on() from * drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c */ - phy_set_bus_width(hdmi->phy, rate / 100); + phy_set_bus_width(hdmi->phy, div_u64(rate, 100)); } } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d2679c5d976b..0b0cd6aa1d9f 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -23,6 +23,7 @@ #include "xe_guc_submit.h" #include "xe_hw_engine.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_sched_job.h" #include "xe_vm.h" @@ -158,8 +159,11 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) { struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + struct xe_device *xe = coredump_to_xe(coredump); unsigned int fw_ref; + xe_pm_runtime_get(xe); + /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) @@ -168,6 +172,8 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); + xe_pm_runtime_put(xe); + /* Calculate devcoredump size */ ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 0e2dd691bdae..06d6db8b50f9 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -350,7 +350,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.evicted); - xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", + WQ_MEM_RECLAIM); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9a8564ea06b5..6f4a9812b4f4 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -767,17 +767,19 @@ static void disable_scheduling_deregister(struct xe_guc *guc, struct xe_exec_queue *q) { MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_device *xe = guc_to_xe(guc); int ret; set_min_preemption_timeout(guc, q); smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || - xe_guc_read_stopped(guc), HZ * 5); + ret = wait_event_timeout(guc->ct.wq, + (!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q)) || + xe_guc_read_stopped(guc), + HZ * 5); if (!ret) { struct xe_gpu_scheduler *sched = &q->guc->sched; - drm_warn(&xe->drm, "Pending enable failed to respond"); + xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); @@ -1099,7 +1101,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * modifying state */ ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_enable(q) || + (!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q)) || xe_guc_read_stopped(guc), HZ * 5); if (!ret || xe_guc_read_stopped(guc)) goto trigger_reset; @@ -1328,8 +1331,8 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && exec_queue_enabled(q)) { - wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || - xe_guc_read_stopped(guc)); + wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING || + xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)); if (!xe_guc_read_stopped(guc)) { s64 since_resume_ms = @@ -1866,16 +1869,29 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, xe_gt_assert(guc_to_gt(guc), runnable_state == 0); xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); - clear_exec_queue_pending_disable(q); if (q->guc->suspend_pending) { suspend_fence_signal(q); + clear_exec_queue_pending_disable(q); } else { if (exec_queue_banned(q) || check_timeout) { smp_wmb(); wake_up_all(&guc->ct.wq); } - if (!check_timeout) + if (!check_timeout && exec_queue_destroyed(q)) { + /* + * Make sure to clear the pending_disable only + * after sampling the destroyed state. We want + * to ensure we don't trigger the unregister too + * early with something intending to only + * disable scheduling. The caller doing the + * destroy must wait for an ongoing + * pending_disable before marking as destroyed. + */ + clear_exec_queue_pending_disable(q); deregister_exec_queue(guc, q); + } else { + clear_exec_queue_pending_disable(q); + } } } } diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index cfd31ae49cc1..1b97d90aadda 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -209,7 +209,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PAGETABLE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1350,6 +1351,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, /* For sysmem PTE's, need to map them in our hole.. */ if (!IS_DGFX(xe)) { + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; u32 ptes, ofs; ppgtt_ofs = NUM_KERNEL_PDE - 1; @@ -1409,7 +1411,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, pt_bo->update_index = current_update; addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, - XE_CACHE_WB, 0); + pat_index, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e6640283893f..6b7f77425c7f 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -174,7 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = { GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) static const struct xe_graphics_desc graphics_xe2 = { - .name = "Xe2_LPG / Xe2_HPG", + .name = "Xe2_LPG / Xe2_HPG / Xe3_LPG", XE2_GFX_FEATURES, }; diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index a90480c6aecf..42f5bebd09e5 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -87,8 +87,12 @@ static void user_fence_worker(struct work_struct *w) drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n"); } - wake_up_all(&ufence->xe->ufence_wq); + /* + * Wake up waiters only after updating the ufence state, allowing the UMD + * to safely reuse the same ufence without encountering -EBUSY errors. + */ WRITE_ONCE(ufence->signalled, 1); + wake_up_all(&ufence->xe->ufence_wq); user_fence_put(ufence); } |