From e5eef23e267c72521d81f23f7f82d1f523d4a253 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 13 Jan 2023 11:24:08 -0500 Subject: drm/display: Don't block HDR_OUTPUT_METADATA on unknown EOTF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EDID of an HDR display defines EOTFs that are supported by the display and can be set in the HDR metadata infoframe. Userspace is expected to read the EDID and set an appropriate HDR_OUTPUT_METADATA. In drm_parse_hdr_metadata_block the kernel reads the supported EOTFs from the EDID and stores them in the drm_connector->hdr_sink_metadata. While doing so it also filters the EOTFs to the EOTFs the kernel knows about. When an HDR_OUTPUT_METADATA is set it then checks to make sure the EOTF is a supported EOTF. In cases where the kernel doesn't know about a new EOTF this check will fail, even if the EDID advertises support. Since it is expected that userspace reads the EDID to understand what the display supports it doesn't make sense for DRM to block an HDR_OUTPUT_METADATA if it contains an EOTF the kernel doesn't understand. This comes with the added benefit of future-proofing metadata support. If the spec defines a new EOTF there is no need to update DRM and an compositor can immediately make use of it. Bug: https://gitlab.freedesktop.org/wayland/weston/-/issues/609 v2: Distinguish EOTFs defind in kernel and ones defined in EDID in the commit description (Pekka) v3: Rebase; drm_hdmi_infoframe_set_hdr_metadata moved to drm_hdmi_helper.c Signed-off-by: Harry Wentland Cc: Pekka Paalanen Cc: Sebastian Wick Cc: Vitaly.Prosyak@amd.com Cc: Uma Shankar Cc: Ville Syrjälä Cc: Joshua Ashton Cc: Jani Nikula Cc: dri-devel@lists.freedesktop.org Cc: amd-gfx@lists.freedesktop.org Acked-by: Pekka Paalanen Reviewed-By: Joshua Ashton Link: https://patchwork.freedesktop.org/patch/msgid/20230113162428.33874-2-harry.wentland@amd.com Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/display/drm_hdmi_helper.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/display/drm_hdmi_helper.c b/drivers/gpu/drm/display/drm_hdmi_helper.c index 0264abe55278..faf5e9efa7d3 100644 --- a/drivers/gpu/drm/display/drm_hdmi_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_helper.c @@ -44,10 +44,8 @@ int drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame, /* Sink EOTF is Bit map while infoframe is absolute values */ if (!is_eotf_supported(hdr_metadata->hdmi_metadata_type1.eotf, - connector->hdr_sink_metadata.hdmi_type1.eotf)) { - DRM_DEBUG_KMS("EOTF Not Supported\n"); - return -EINVAL; - } + connector->hdr_sink_metadata.hdmi_type1.eotf)) + DRM_DEBUG_KMS("Unknown EOTF %d\n", hdr_metadata->hdmi_metadata_type1.eotf); err = hdmi_drm_infoframe_init(frame); if (err < 0) -- cgit v1.2.3 From 7d386975f6a495902e679a3a250a7456d7e54765 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 13 Jan 2023 11:24:09 -0500 Subject: drm/connector: print max_requested_bpc in state debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is useful to understand the bpc defaults and support of a driver. Signed-off-by: Harry Wentland Cc: Pekka Paalanen Cc: Sebastian Wick Cc: Vitaly.Prosyak@amd.com Cc: Uma Shankar Cc: Ville Syrjälä Cc: Joshua Ashton Cc: Jani Nikula Cc: dri-devel@lists.freedesktop.org Cc: amd-gfx@lists.freedesktop.org Reviewed-By: Joshua Ashton Link: https://patchwork.freedesktop.org/patch/msgid/20230113162428.33874-3-harry.wentland@amd.com Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/drm_atomic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 5457c02ca1ab..fed41800fea7 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1070,6 +1070,7 @@ static void drm_atomic_connector_print_state(struct drm_printer *p, drm_printf(p, "connector[%u]: %s\n", connector->base.id, connector->name); drm_printf(p, "\tcrtc=%s\n", state->crtc ? state->crtc->name : "(null)"); drm_printf(p, "\tself_refresh_aware=%d\n", state->self_refresh_aware); + drm_printf(p, "\tmax_requested_bpc=%d\n", state->max_requested_bpc); if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) if (state->writeback_job && state->writeback_job->fb) -- cgit v1.2.3 From 06630fb9fcd761254a8d8b53dd6f859b3ecf3707 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 24 Feb 2023 17:26:33 +0800 Subject: drm/amdgpu: Support umc node harvest config on umc v8_10 Don't need to query error count and error address on harvest umc nodes. v2: Fix code bug, use active_mask instead of harvsest_config and remove unnecessary argument in LOOP macro. v3: Leave adev->gmc.num_umc unchanged. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 7 +++++-- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 - drivers/gpu/drm/amd/amdgpu/umc_v8_10.h | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b719852daa07..1a3cb53d2e0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -543,6 +543,7 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, struct harvest_table *harvest_info; u16 offset; int i; + uint32_t umc_harvest_config = 0; bhdr = (struct binary_header *)adev->mman.discovery_bin; offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset); @@ -570,12 +571,17 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; break; case UMC_HWID: + umc_harvest_config |= + 1 << (le16_to_cpu(harvest_info->list[i].number_instance)); (*umc_harvest_count)++; break; default: break; } } + + adev->umc.active_mask = ((1 << adev->umc.node_inst_num) - 1) & + ~umc_harvest_config; } /* ================================================== */ @@ -1156,8 +1162,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) AMDGPU_MAX_SDMA_INSTANCES); } - if (le16_to_cpu(ip->hw_id) == UMC_HWID) + if (le16_to_cpu(ip->hw_id) == UMC_HWID) { adev->gmc.num_umc++; + adev->umc.node_inst_num++; + } for (k = 0; k < num_base_address; k++) { /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index f2bf979af588..36e19336f3b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -42,7 +42,7 @@ #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) #define LOOP_UMC_NODE_INST(node_inst) \ - for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++) + for_each_set_bit((node_inst), &(adev->umc.active_mask), adev->umc.node_inst_num) #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) @@ -69,7 +69,7 @@ struct amdgpu_umc { /* number of umc instance with memory map register access */ uint32_t umc_inst_num; - /*number of umc node instance with memory map register access*/ + /* Total number of umc node instance including harvest one */ uint32_t node_inst_num; /* UMC regiser per channel offset */ @@ -82,6 +82,9 @@ struct amdgpu_umc { const struct amdgpu_umc_funcs *funcs; struct amdgpu_umc_ras *ras; + + /* active mask for umc node instance */ + unsigned long active_mask; }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 85e0afc3d4f7..af7b3ba1ca00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -567,7 +567,6 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) case IP_VERSION(8, 10, 0): adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; - adev->umc.node_inst_num = adev->gmc.num_umc; adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h index 25eaf4af5fcf..c6dfd433fec7 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -31,9 +31,9 @@ /* number of umc instance with memory map register access */ #define UMC_V8_10_UMC_INSTANCE_NUM 2 -/* Total channel instances for all umc nodes */ +/* Total channel instances for all available umc nodes */ #define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ - (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num) + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->gmc.num_umc) /* UMC regiser per channel offset */ #define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 -- cgit v1.2.3 From c53899138c99236482a3c25d674f44723336afa3 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Mon, 16 Jan 2023 16:23:21 +0800 Subject: drm/amd/pm: Enable ecc_info table support for smu v13_0_10 Support EccInfoTable which includes umc ras error count and error address. Signed-off-by: Candice Li Reviewed-by: Evan Quan Reviewed-by: Stanley.Yang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 75 ++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 923a9fb3c887..27448ffe60a4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -46,6 +46,7 @@ #include "asic_reg/mp/mp_13_0_0_sh_mask.h" #include "smu_cmn.h" #include "amdgpu_ras.h" +#include "umc_v8_10.h" /* * DO NOT use these for err/warn/info/debug messages. @@ -90,6 +91,12 @@ #define DEBUGSMC_MSG_Mode1Reset 2 +/* + * SMU_v13_0_10 supports ECCTABLE since version 80.34.0, + * use this to check ECCTABLE feature whether support + */ +#define SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200 + static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -229,6 +236,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(I2C_COMMANDS), + TAB_MAP(ECCINFO), }; static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -462,6 +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) if (!smu_table->watermarks_table) goto err2_out; + smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); + if (!smu_table->ecc_table) + goto err3_out; + return 0; +err3_out: + kfree(smu_table->watermarks_table); err2_out: kfree(smu_table->gpu_metrics_table); err1_out: @@ -2036,6 +2052,64 @@ static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu, return ret; } +static int smu_v13_0_0_check_ecc_table_support(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t if_version = 0xff, smu_version = 0xff; + int ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); + if (ret) + return -EOPNOTSUPP; + + if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) && + (smu_version >= SUPPORT_ECCTABLE_SMU_13_0_10_VERSION)) + return ret; + else + return -EOPNOTSUPP; +} + +static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu, + void *table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct amdgpu_device *adev = smu->adev; + EccInfoTable_t *ecc_table = NULL; + struct ecc_info_per_ch *ecc_info_per_channel = NULL; + int i, ret = 0; + struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table; + + ret = smu_v13_0_0_check_ecc_table_support(smu); + if (ret) + return ret; + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ECCINFO, + 0, + smu_table->ecc_table, + false); + if (ret) { + dev_info(adev->dev, "Failed to export SMU ecc table!\n"); + return ret; + } + + ecc_table = (EccInfoTable_t *)smu_table->ecc_table; + + for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) { + ecc_info_per_channel = &(eccinfo->ecc[i]); + ecc_info_per_channel->ce_count_lo_chip = + ecc_table->EccInfo[i].ce_count_lo_chip; + ecc_info_per_channel->ce_count_hi_chip = + ecc_table->EccInfo[i].ce_count_hi_chip; + ecc_info_per_channel->mca_umc_status = + ecc_table->EccInfo[i].mca_umc_status; + ecc_info_per_channel->mca_umc_addr = + ecc_table->EccInfo[i].mca_umc_addr; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -2111,6 +2185,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, .gpo_control = smu_v13_0_gpo_control, + .get_ecc_info = smu_v13_0_0_get_ecc_info, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) -- cgit v1.2.3 From 2d99a7ec25cf456cd3680eb314d6454138e5aa64 Mon Sep 17 00:00:00 2001 From: Swapnil Patel Date: Wed, 1 Mar 2023 14:33:33 -0500 Subject: drm/amd/display: Update clock table to include highest clock setting [Why] Currently, the clk manager matches SocVoltage with voltage from fused settings (dfPstate clock table). And then corresponding clocks are selected. However in certain situations, this leads to clk manager not including at least one entry with highest supported clock setting. [How] Update the clk manager to include at least one entry with highest supported clock setting. Reviewed-by: Pavle Kotarac Acked-by: Qingqing Zhuo Signed-off-by: Swapnil Patel Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 24715ca2fa94..01383aac6b41 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -529,6 +529,19 @@ static struct clk_bw_params vg_bw_params = { }; +static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) +{ + uint32_t max = 0; + int i; + + for (i = 0; i < num_clocks; ++i) { + if (clocks[i] > max) + max = clocks[i]; + } + + return max; +} + static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_table, unsigned int voltage) { @@ -572,12 +585,16 @@ static void vg_clk_mgr_helper_populate_bw_params( bw_params->clk_table.num_entries = j + 1; - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { + for (i = 0; i < bw_params->clk_table.num_entries - 1; i++, j--) { bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].fclk; bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk; bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage; bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->DfPstateTable[j].voltage); } + bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, VG_NUM_DCFCLK_DPM_LEVELS); bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; -- cgit v1.2.3 From 93bb18d2a873d2fa9625c8ea927723660a868b95 Mon Sep 17 00:00:00 2001 From: lyndonli Date: Thu, 2 Mar 2023 14:18:12 +0800 Subject: drm/amdgpu: Fix call trace warning and hang when removing amdgpu device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On GPUs with RAS enabled, below call trace and hang are observed when shutting down device. v2: use DRM device unplugged flag instead of shutdown flag as the check to prevent memory wipe in shutdown stage. [ +0.000000] RIP: 0010:amdgpu_vram_mgr_fini+0x18d/0x1c0 [amdgpu] [ +0.000001] PKRU: 55555554 [ +0.000001] Call Trace: [ +0.000001] [ +0.000002] amdgpu_ttm_fini+0x140/0x1c0 [amdgpu] [ +0.000183] amdgpu_bo_fini+0x27/0xa0 [amdgpu] [ +0.000184] gmc_v11_0_sw_fini+0x2b/0x40 [amdgpu] [ +0.000163] amdgpu_device_fini_sw+0xb6/0x510 [amdgpu] [ +0.000152] amdgpu_driver_release_kms+0x16/0x30 [amdgpu] [ +0.000090] drm_dev_release+0x28/0x50 [drm] [ +0.000016] devm_drm_dev_init_release+0x38/0x60 [drm] [ +0.000011] devm_action_release+0x15/0x20 [ +0.000003] release_nodes+0x40/0xc0 [ +0.000001] devres_release_all+0x9e/0xe0 [ +0.000001] device_unbind_cleanup+0x12/0x80 [ +0.000003] device_release_driver_internal+0xff/0x160 [ +0.000001] driver_detach+0x4a/0x90 [ +0.000001] bus_remove_driver+0x6c/0xf0 [ +0.000001] driver_unregister+0x31/0x50 [ +0.000001] pci_unregister_driver+0x40/0x90 [ +0.000003] amdgpu_exit+0x15/0x120 [amdgpu] Signed-off-by: lyndonli Reviewed-by: Guchun Chen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e3e1ed4314dd..6c7d672412b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1315,7 +1315,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM || !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || - adev->in_suspend || adev->shutdown) + adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev))) return; if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) -- cgit v1.2.3 From 1717cc5f2962a4652c76ed3858b499ccae6c277c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 1 Mar 2023 09:36:06 -0600 Subject: drm/amd: Fix initialization mistake for NBIO 7.3.0 The same strapping initialization issue that happened on NBIO 7.5.1 appears to be happening on NBIO 7.3.0. Apply the same fix to 7.3.0 as well. Note: This workaround relies upon the integrated GPU being enabled in BIOS. If the integrated GPU is disabled in BIOS a different workaround will be required. Reported-by: Thomas Glanzmann Cc: Basavaraj Natikar Link: https://lore.kernel.org/linux-usb/Y%2Fz9GdHjPyF2rNG3@glanzmann.de/T/#u Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 4b0d563c6522..4ef1fa4603c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -382,11 +382,6 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data); break; - case IP_VERSION(7, 5, 1): - data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2); - data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK; - WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data); - fallthrough; default: def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL)); data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, @@ -399,6 +394,15 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev) break; } + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(7, 3, 0): + case IP_VERSION(7, 5, 1): + data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2); + data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK; + WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data); + break; + } + if (amdgpu_sriov_vf(adev)) adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; -- cgit v1.2.3 From 20534dbcc7b7bfb447279cdcfb0d88ee3b779a18 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 27 Feb 2023 15:42:28 +0100 Subject: drm/amdgpu: fix return value check in kfd This patch fixes a return value check in kfd doorbell handling. This function should return 0(error) only when the ida_simple_get returns < 0(error), return > 0 is a success case. Cc: Felix Kuehling Cc: Alex Deucher Fixes: 16f0013157bf ("drm/amdkfd: Allocate doorbells only when needed") Acked-by: Christian Koenig Reviewed-by: Felix Kuehling Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index cbef2e147da5..38c9e1ca6691 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -280,7 +280,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) if (!pdd->doorbell_index) { int r = kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index); - if (r) + if (r < 0) return 0; } -- cgit v1.2.3 From 8879ec6dfdcdcca7718eeb4a584805eb205288bf Mon Sep 17 00:00:00 2001 From: lyndonli Date: Fri, 3 Mar 2023 14:55:05 +0800 Subject: drm/amdgpu: Fix the warning info when removing amdgpu device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Actually, the drm_dev_enter in psp_cmd_submit_buf does not protect anything. If DRM device is unplugged, it will always check the condition in WARN_ON. So drop drm_dev_enter and drm_dev_exit in psp_cmd_submit_buf. When removing amdgpu, the calling order is as follows: amdgpu_pci_remove drm_dev_unplug amdgpu_driver_unload_kms amdgpu_device_fini_hw amdgpu_device_ip_fini_early psp_hw_fini psp_ras_terminate psp_ta_unloadye psp_cmd_submit_buf [ 4507.740388] Call Trace: [ 4507.740389] [ 4507.740391] psp_ta_unload+0x44/0x70 [amdgpu] [ 4507.740485] psp_ras_terminate+0x4d/0x70 [amdgpu] [ 4507.740575] psp_hw_fini+0x28/0xa0 [amdgpu] [ 4507.740662] amdgpu_device_fini_hw+0x328/0x442 [amdgpu] [ 4507.740791] amdgpu_driver_unload_kms+0x51/0x60 [amdgpu] [ 4507.740875] amdgpu_pci_remove+0x5a/0x140 [amdgpu] [ 4507.740962] ? _raw_spin_unlock_irqrestore+0x27/0x43 [ 4507.740965] ? __pm_runtime_resume+0x60/0x90 [ 4507.740968] pci_device_remove+0x39/0xb0 [ 4507.740971] device_remove+0x46/0x70 [ 4507.740972] device_release_driver_internal+0xd1/0x160 [ 4507.740974] driver_detach+0x4a/0x90 [ 4507.740975] bus_remove_driver+0x6c/0xf0 [ 4507.740976] driver_unregister+0x31/0x50 [ 4507.740977] pci_unregister_driver+0x40/0x90 [ 4507.740978] amdgpu_exit+0x15/0x120 [amdgpu] v2: fix commit message style issue Signed-off-by: lyndonli Reviewed-by: Guchun Chen Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 28fe6d941054..3f5d13035aff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -602,27 +602,14 @@ psp_cmd_submit_buf(struct psp_context *psp, struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr) { int ret; - int index, idx; + int index; int timeout = 20000; bool ras_intr = false; bool skip_unsupport = false; - bool dev_entered; if (psp->adev->no_hw_access) return 0; - dev_entered = drm_dev_enter(adev_to_drm(psp->adev), &idx); - /* - * We allow sending PSP messages LOAD_ASD and UNLOAD_TA without acquiring - * a lock in drm_dev_enter during driver unload because we must call - * drm_dev_unplug as the beginning of unload driver sequence . It is very - * crucial that userspace can't access device instances anymore. - */ - if (!dev_entered) - WARN_ON(psp->cmd_buf_mem->cmd_id != GFX_CMD_ID_LOAD_ASD && - psp->cmd_buf_mem->cmd_id != GFX_CMD_ID_UNLOAD_TA && - psp->cmd_buf_mem->cmd_id != GFX_CMD_ID_INVOKE_CMD); - memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); @@ -686,8 +673,6 @@ psp_cmd_submit_buf(struct psp_context *psp, } exit: - if (dev_entered) - drm_dev_exit(idx); return ret; } -- cgit v1.2.3 From 0dcdf8498eae2727bb33cef3576991dc841d4343 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 6 Mar 2023 10:34:20 -0500 Subject: drm/amdgpu: fix error checking in amdgpu_read_mm_registers for soc15 Properly skip non-existent registers as well. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2442 Reviewed-by: Hawking Zhang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 7cd17dda32ce..2eddd7f6cd41 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -439,8 +439,9 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { en = &soc15_allowed_read_registers[i]; - if (adev->reg_offset[en->hwip][en->inst] && - reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + if (!adev->reg_offset[en->hwip][en->inst]) + continue; + else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset)) continue; -- cgit v1.2.3 From 2915e43a033a778816fa4bc621f033576796521e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 6 Mar 2023 10:35:34 -0500 Subject: drm/amdgpu: fix error checking in amdgpu_read_mm_registers for soc21 Properly skip non-existent registers as well. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2442 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc21.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 620f7409825d..9df2236007ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -291,9 +291,10 @@ static int soc21_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(soc21_allowed_read_registers); i++) { en = &soc21_allowed_read_registers[i]; - if (adev->reg_offset[en->hwip][en->inst] && - reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] - + en->reg_offset)) + if (!adev->reg_offset[en->hwip][en->inst]) + continue; + else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + + en->reg_offset)) continue; *value = soc21_get_register_value(adev, -- cgit v1.2.3 From b42fee5e0b44344cfe4c38e61341ee250362c83f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 7 Mar 2023 08:59:13 -0500 Subject: drm/amdgpu: fix error checking in amdgpu_read_mm_registers for nv Properly skip non-existent registers as well. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2442 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/nv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d972025f0d20..855d390c41de 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -444,9 +444,10 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) { en = &nv_allowed_read_registers[i]; - if (adev->reg_offset[en->hwip][en->inst] && - reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] - + en->reg_offset)) + if (!adev->reg_offset[en->hwip][en->inst]) + continue; + else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + + en->reg_offset)) continue; *value = nv_get_register_value(adev, -- cgit v1.2.3 From 6ce2ea07c5ff0a8188eab0e5cd1f0e4899b36835 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Wed, 8 Mar 2023 19:33:53 +0530 Subject: drm/amdgpu/soc21: Add video cap query support for VCN_4_0_4 Added the video capability query support for VCN version 4_0_4 Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: Leo Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 9df2236007ab..061793d390cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -111,6 +111,7 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, switch (adev->ip_versions[UVD_HWIP][0]) { case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 2): + case IP_VERSION(4, 0, 4): if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) { if (encode) *codecs = &vcn_4_0_0_video_codecs_encode_vcn1; -- cgit v1.2.3