diff options
131 files changed, 2493 insertions, 725 deletions
diff --git a/Documentation/devicetree/bindings/display/msm/gmu.txt b/Documentation/devicetree/bindings/display/msm/gmu.txt index 90af5b0a56a9..bf9c7a2a495c 100644 --- a/Documentation/devicetree/bindings/display/msm/gmu.txt +++ b/Documentation/devicetree/bindings/display/msm/gmu.txt @@ -31,6 +31,10 @@ Required properties: - iommus: phandle to the adreno iommu - operating-points-v2: phandle to the OPP operating points +Optional properties: +- sram: phandle to the On Chip Memory (OCMEM) that's present on some Snapdragon + SoCs. See Documentation/devicetree/bindings/sram/qcom,ocmem.yaml. + Example: / { @@ -63,3 +67,50 @@ Example: operating-points-v2 = <&gmu_opp_table>; }; }; + +a3xx example with OCMEM support: + +/ { + ... + + gpu: adreno@fdb00000 { + compatible = "qcom,adreno-330.2", + "qcom,adreno"; + reg = <0xfdb00000 0x10000>; + reg-names = "kgsl_3d0_reg_memory"; + interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "kgsl_3d0_irq"; + clock-names = "core", + "iface", + "mem_iface"; + clocks = <&mmcc OXILI_GFX3D_CLK>, + <&mmcc OXILICX_AHB_CLK>, + <&mmcc OXILICX_AXI_CLK>; + sram = <&gmu_sram>; + power-domains = <&mmcc OXILICX_GDSC>; + operating-points-v2 = <&gpu_opp_table>; + iommus = <&gpu_iommu 0>; + }; + + ocmem@fdd00000 { + compatible = "qcom,msm8974-ocmem"; + + reg = <0xfdd00000 0x2000>, + <0xfec00000 0x180000>; + reg-names = "ctrl", + "mem"; + + clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>, + <&mmcc OCMEMCX_OCMEMNOC_CLK>; + clock-names = "core", + "iface"; + + #address-cells = <1>; + #size-cells = <1>; + + gmu_sram: gmu-sram@0 { + reg = <0x0 0x100000>; + ranges = <0 0 0xfec00000 0x100000>; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/display/msm/mdp5.txt b/Documentation/devicetree/bindings/display/msm/mdp5.txt index 4e11338548aa..43d11279c925 100644 --- a/Documentation/devicetree/bindings/display/msm/mdp5.txt +++ b/Documentation/devicetree/bindings/display/msm/mdp5.txt @@ -76,6 +76,8 @@ Required properties: Optional properties: - clock-names: the following clocks are optional: * "lut" + * "tbu" + * "tbu_rt" Example: diff --git a/Documentation/devicetree/bindings/sram/qcom,ocmem.yaml b/Documentation/devicetree/bindings/sram/qcom,ocmem.yaml new file mode 100644 index 000000000000..222990f9923c --- /dev/null +++ b/Documentation/devicetree/bindings/sram/qcom,ocmem.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sram/qcom,ocmem.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: On Chip Memory (OCMEM) that is present on some Qualcomm Snapdragon SoCs. + +maintainers: + - Brian Masney <masneyb@onstation.org> + +description: | + The On Chip Memory (OCMEM) is typically used by the GPU, camera/video, and + audio components on some Snapdragon SoCs. + +properties: + compatible: + const: qcom,msm8974-ocmem + + reg: + items: + - description: Control registers + - description: OCMEM address range + + reg-names: + items: + - const: ctrl + - const: mem + + clocks: + items: + - description: Core clock + - description: Interface clock + + clock-names: + items: + - const: core + - const: iface + + '#address-cells': + const: 1 + + '#size-cells': + const: 1 + +required: + - compatible + - reg + - reg-names + - clocks + - clock-names + - '#address-cells' + - '#size-cells' + +patternProperties: + "^.+-sram$": + type: object + description: A region of reserved memory. + + properties: + reg: + maxItems: 1 + + ranges: + maxItems: 1 + + required: + - reg + - ranges + +examples: + - | + #include <dt-bindings/clock/qcom,rpmcc.h> + #include <dt-bindings/clock/qcom,mmcc-msm8974.h> + + ocmem: ocmem@fdd00000 { + compatible = "qcom,msm8974-ocmem"; + + reg = <0xfdd00000 0x2000>, + <0xfec00000 0x180000>; + reg-names = "ctrl", + "mem"; + + clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>, + <&mmcc OCMEMCX_OCMEMNOC_CLK>; + clock-names = "core", + "iface"; + + #address-cells = <1>; + #size-cells = <1>; + + gmu-sram@0 { + reg = <0x0 0x100000>; + ranges = <0 0 0xfec00000 0x100000>; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 741e3f433f6e..71c4ca8b278f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -856,7 +856,6 @@ S: Maintained F: drivers/i2c/busses/i2c-amd-mp2* AMD POWERPLAY -M: Rex Zhu <rex.zhu@amd.com> M: Evan Quan <evan.quan@amd.com> L: amd-gfx@lists.freedesktop.org S: Supported diff --git a/arch/ia64/include/asm/agp.h b/arch/ia64/include/asm/agp.h index 2b451c4496da..0261507dc264 100644 --- a/arch/ia64/include/asm/agp.h +++ b/arch/ia64/include/asm/agp.h @@ -14,8 +14,8 @@ * in coherent mode, which lets us map the AGP memory as normal (write-back) memory * (unlike x86, where it gets mapped "write-coalescing"). */ -#define map_page_into_agp(page) /* nothing */ -#define unmap_page_from_agp(page) /* nothing */ +#define map_page_into_agp(page) do { } while (0) +#define unmap_page_from_agp(page) do { } while (0) #define flush_agp_cache() mb() /* GATT allocation. Returns/accepts GATT kernel virtual address. */ diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index f6955888e676..47098648502d 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -102,14 +102,13 @@ agp_segment_priv *agp_find_seg_in_client(const struct agp_client *client, int size, pgprot_t page_prot) { struct agp_segment_priv *seg; - int num_segments, i; + int i; off_t pg_start; size_t pg_count; pg_start = offset / 4096; pg_count = size / 4096; seg = *(client->segments); - num_segments = client->num_segments; for (i = 0; i < client->num_segments; i++) { if ((seg[i].pg_start == pg_start) && diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index df1edb5ec0ad..ab154a75acf0 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -207,6 +207,7 @@ EXPORT_SYMBOL(agp_free_memory); /** * agp_allocate_memory - allocate a group of pages of a certain type. * + * @bridge: an agp_bridge_data struct allocated for the AGP host bridge. * @page_count: size_t argument of the number of pages * @type: u32 argument of the type of memory to be allocated. * @@ -355,6 +356,7 @@ EXPORT_SYMBOL_GPL(agp_num_entries); /** * agp_copy_info - copy bridge state information * + * @bridge: an agp_bridge_data struct allocated for the AGP host bridge. * @info: agp_kern_info pointer. The caller should insure that this pointer is valid. * * This function copies information about the agp bridge device and the state of @@ -850,7 +852,6 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) { char *table; char *table_end; - int size; int page_order; int num_entries; int i; @@ -864,25 +865,22 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) table = NULL; i = bridge->aperture_size_idx; temp = bridge->current_size; - size = page_order = num_entries = 0; + page_order = num_entries = 0; if (bridge->driver->size_type != FIXED_APER_SIZE) { do { switch (bridge->driver->size_type) { case U8_APER_SIZE: - size = A_SIZE_8(temp)->size; page_order = A_SIZE_8(temp)->page_order; num_entries = A_SIZE_8(temp)->num_entries; break; case U16_APER_SIZE: - size = A_SIZE_16(temp)->size; page_order = A_SIZE_16(temp)->page_order; num_entries = A_SIZE_16(temp)->num_entries; break; case U32_APER_SIZE: - size = A_SIZE_32(temp)->size; page_order = A_SIZE_32(temp)->page_order; num_entries = A_SIZE_32(temp)->num_entries; break; @@ -890,7 +888,7 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) case FIXED_APER_SIZE: case LVL2_APER_SIZE: default: - size = page_order = num_entries = 0; + page_order = num_entries = 0; break; } @@ -920,7 +918,6 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) } } while (!table && (i < bridge->driver->num_aperture_sizes)); } else { - size = ((struct aper_size_info_fixed *) temp)->size; page_order = ((struct aper_size_info_fixed *) temp)->page_order; num_entries = ((struct aper_size_info_fixed *) temp)->num_entries; table = alloc_gatt_pages(page_order); @@ -1282,6 +1279,7 @@ EXPORT_SYMBOL(agp_generic_destroy_page); /** * agp_enable - initialise the agp point-to-point connection. * + * @bridge: an agp_bridge_data struct allocated for the AGP host bridge. * @mode: agp mode register value to configure with. */ void agp_enable(struct agp_bridge_data *bridge, u32 mode) diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index 215061c581e1..5d90b7f5ab5a 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -442,6 +442,41 @@ int __qcom_scm_hdcp_req(struct device *dev, struct qcom_scm_hdcp_req *req, req, req_cnt * sizeof(*req), resp, sizeof(*resp)); } +int __qcom_scm_ocmem_lock(struct device *dev, u32 id, u32 offset, u32 size, + u32 mode) +{ + struct ocmem_tz_lock { + __le32 id; + __le32 offset; + __le32 size; + __le32 mode; + } request; + + request.id = cpu_to_le32(id); + request.offset = cpu_to_le32(offset); + request.size = cpu_to_le32(size); + request.mode = cpu_to_le32(mode); + + return qcom_scm_call(dev, QCOM_SCM_OCMEM_SVC, QCOM_SCM_OCMEM_LOCK_CMD, + &request, sizeof(request), NULL, 0); +} + +int __qcom_scm_ocmem_unlock(struct device *dev, u32 id, u32 offset, u32 size) +{ + struct ocmem_tz_unlock { + __le32 id; + __le32 offset; + __le32 size; + } request; + + request.id = cpu_to_le32(id); + request.offset = cpu_to_le32(offset); + request.size = cpu_to_le32(size); + + return qcom_scm_call(dev, QCOM_SCM_OCMEM_SVC, QCOM_SCM_OCMEM_UNLOCK_CMD, + &request, sizeof(request), NULL, 0); +} + void __qcom_scm_init(void) { } @@ -582,7 +617,22 @@ int __qcom_scm_assign_mem(struct device *dev, phys_addr_t mem_region, int __qcom_scm_restore_sec_cfg(struct device *dev, u32 device_id, u32 spare) { - return -ENODEV; + struct msm_scm_sec_cfg { + __le32 id; + __le32 ctx_bank_num; + } cfg; + int ret, scm_ret = 0; + + cfg.id = cpu_to_le32(device_id); + cfg.ctx_bank_num = cpu_to_le32(spare); + + ret = qcom_scm_call(dev, QCOM_SCM_SVC_MP, QCOM_SCM_RESTORE_SEC_CFG, + &cfg, sizeof(cfg), &scm_ret, sizeof(scm_ret)); + + if (ret || scm_ret) + return ret ? ret : -EINVAL; + + return 0; } int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare, diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 91d5ad7cf58b..c3a3d9874def 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -241,6 +241,18 @@ int __qcom_scm_hdcp_req(struct device *dev, struct qcom_scm_hdcp_req *req, return ret; } +int __qcom_scm_ocmem_lock(struct device *dev, uint32_t id, uint32_t offset, + uint32_t size, uint32_t mode) +{ + return -ENOTSUPP; +} + +int __qcom_scm_ocmem_unlock(struct device *dev, uint32_t id, uint32_t offset, + uint32_t size) +{ + return -ENOTSUPP; +} + void __qcom_scm_init(void) { u64 cmd; diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 4802ab170fe5..27c1d98a34e6 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -192,6 +192,46 @@ bool qcom_scm_pas_supported(u32 peripheral) EXPORT_SYMBOL(qcom_scm_pas_supported); /** + * qcom_scm_ocmem_lock_available() - is OCMEM lock/unlock interface available + */ +bool qcom_scm_ocmem_lock_available(void) +{ + return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_OCMEM_SVC, + QCOM_SCM_OCMEM_LOCK_CMD); +} +EXPORT_SYMBOL(qcom_scm_ocmem_lock_available); + +/** + * qcom_scm_ocmem_lock() - call OCMEM lock interface to assign an OCMEM + * region to the specified initiator + * + * @id: tz initiator id + * @offset: OCMEM offset + * @size: OCMEM size + * @mode: access mode (WIDE/NARROW) + */ +int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size, + u32 mode) +{ + return __qcom_scm_ocmem_lock(__scm->dev, id, offset, size, mode); +} +EXPORT_SYMBOL(qcom_scm_ocmem_lock); + +/** + * qcom_scm_ocmem_unlock() - call OCMEM unlock interface to release an OCMEM + * region from the specified initiator + * + * @id: tz initiator id + * @offset: OCMEM offset + * @size: OCMEM size + */ +int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size) +{ + return __qcom_scm_ocmem_unlock(__scm->dev, id, offset, size); +} +EXPORT_SYMBOL(qcom_scm_ocmem_unlock); + +/** * qcom_scm_pas_init_image() - Initialize peripheral authentication service * state machine for a given peripheral, using the * metadata @@ -327,6 +367,19 @@ static const struct reset_control_ops qcom_scm_pas_reset_ops = { .deassert = qcom_scm_pas_reset_deassert, }; +/** + * qcom_scm_restore_sec_cfg_available() - Check if secure environment + * supports restore security config interface. + * + * Return true if restore-cfg interface is supported, false if not. + */ +bool qcom_scm_restore_sec_cfg_available(void) +{ + return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_MP, + QCOM_SCM_RESTORE_SEC_CFG); +} +EXPORT_SYMBOL(qcom_scm_restore_sec_cfg_available); + int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return __qcom_scm_restore_sec_cfg(__scm->dev, device_id, spare); diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h index 99506bd873c0..ef293ee67ec1 100644 --- a/drivers/firmware/qcom_scm.h +++ b/drivers/firmware/qcom_scm.h @@ -42,6 +42,15 @@ extern int __qcom_scm_hdcp_req(struct device *dev, extern void __qcom_scm_init(void); +#define QCOM_SCM_OCMEM_SVC 0xf +#define QCOM_SCM_OCMEM_LOCK_CMD 0x1 +#define QCOM_SCM_OCMEM_UNLOCK_CMD 0x2 + +extern int __qcom_scm_ocmem_lock(struct device *dev, u32 id, u32 offset, + u32 size, u32 mode); +extern int __qcom_scm_ocmem_unlock(struct device *dev, u32 id, u32 offset, + u32 size); + #define QCOM_SCM_SVC_PIL 0x2 #define QCOM_SCM_PAS_INIT_IMAGE_CMD 0x1 #define QCOM_SCM_PAS_MEM_SETUP_CMD 0x2 diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 1168351267fd..bfdadc3667e0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -95,6 +95,7 @@ config DRM_KMS_FB_HELPER config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" + depends on STACKTRACE_SUPPORT select STACKDEPOT depends on DRM_KMS_HELPER depends on DEBUG_KERNEL diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ae6f5446262c..12dbcfaa34b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +/* Estimate page table size needed to represent a given memory size + * + * With 4KB pages, we need one 8 byte PTE for each 4KB of memory + * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB + * of memory (factor 256K, >> 18). ROCm user mode tries to optimize + * for 2MB pages for TLB efficiency. However, small allocations and + * fragmented system memory still need some 4KB pages. We choose a + * compromise that should work in most cases without reserving too + * much memory for page tables unnecessarily (factor 16K, >> 14). + */ +#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) + static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; - uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 58f6b3b92831..4f76beafb2fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3109,7 +3109,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int r; DRM_INFO("amdgpu: finishing device.\n"); + flush_delayed_work(&adev->delayed_init_work); adev->shutdown = true; + /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ @@ -3127,7 +3129,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; } adev->accel_working = false; - cancel_delayed_work_sync(&adev->delayed_init_work); /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d2dd59a95e8a..3cadb0b76f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -514,7 +514,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN && + adev->asic_type < CHIP_RAVEN && (adev->flags & AMD_IS_APU) && (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 557be89421a8..0ffc9447b573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -998,10 +998,10 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ - {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c9d1fada6188..e00b46180d2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -454,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; - if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) - kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index a74ecd449775..0ae0a2715b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -225,7 +225,7 @@ struct amdgpu_me { uint32_t num_me; uint32_t num_pipe_per_me; uint32_t num_queue_per_pipe; - void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1]; + void *mqd_backup[AMDGPU_MAX_GFX_RINGS]; /* These are the resources for which amdgpu takes ownership */ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 406736a1bd3d..b499a3de8bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -77,6 +77,7 @@ struct amdgpu_gmc_fault { struct amdgpu_vmhub { uint32_t ctx0_ptb_addr_lo32; uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_sem; uint32_t vm_inv_eng0_req; uint32_t vm_inv_eng0_ack; uint32_t vm_context0_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 82c46c4faaad..b6db28a570c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -655,15 +655,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -ENOMEM; alloc_size = info->read_mmr_reg.count * sizeof(*regs); - for (i = 0; i < info->read_mmr_reg.count; i++) + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); kfree(regs); + amdgpu_gfx_off_ctrl(adev, true); return -EFAULT; } + } + amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); return n ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index bbe9ac7e843f..44be3a45b25e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -567,7 +567,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw) + if (!psp->adev->psp.ta_fw || + !psp->adev->psp.ta_xgmi_ucode_size || + !psp->adev->psp.ta_xgmi_start_addr) return -ENOENT; if (!psp->xgmi_context.initialized) { @@ -756,6 +758,12 @@ static int psp_ras_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->ras.ras_initialized) return 0; @@ -777,6 +785,18 @@ static int psp_ras_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_ras_ucode_size || + !psp->adev->psp.ta_ras_start_addr) { + dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n"); + return 0; + } + if (!psp->ras.ras_initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) @@ -866,6 +886,18 @@ static int psp_hdcp_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_hdcp_ucode_size || + !psp->adev->psp.ta_hdcp_start_addr) { + dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n"); + return 0; + } + if (!psp->hdcp_context.hdcp_initialized) { ret = psp_hdcp_init_shared_buf(psp); if (ret) @@ -948,6 +980,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->hdcp_context.hdcp_initialized) return 0; @@ -1039,6 +1077,18 @@ static int psp_dtm_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_dtm_ucode_size || + !psp->adev->psp.ta_dtm_start_addr) { + dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n"); + return 0; + } + if (!psp->dtm_context.dtm_initialized) { ret = psp_dtm_init_shared_buf(psp); if (ret) @@ -1091,6 +1141,12 @@ static int psp_dtm_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->dtm_context.dtm_initialized) return 0; @@ -1411,7 +1467,10 @@ out: || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) /*skip ucode loading in SRIOV VF */ continue; @@ -1428,8 +1487,8 @@ out: return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (psp->autoload_supported && ucode->ucode_id == - AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? + AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 7de16c0c2f20..2a8e04895595 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -27,7 +27,8 @@ #include <linux/bits.h> #include "smu_v11_0_i2c.h" -#define EEPROM_I2C_TARGET_ADDR 0xA0 +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 +#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -83,7 +84,7 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, { int ret = 0; struct i2c_msg msg = { - .addr = EEPROM_I2C_TARGET_ADDR, + .addr = 0, .flags = 0, .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, .buf = buff, @@ -93,6 +94,8 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, *(uint16_t *)buff = EEPROM_HDR_START; __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); + msg.addr = control->i2c_address; + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); if (ret < 1) DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); @@ -203,7 +206,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct i2c_msg msg = { - .addr = EEPROM_I2C_TARGET_ADDR, + .addr = 0, .flags = I2C_M_RD, .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, .buf = buff, @@ -213,10 +216,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) switch (adev->asic_type) { case CHIP_VEGA20: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20; ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); break; case CHIP_ARCTURUS: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS; ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor); break; @@ -229,6 +234,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) return ret; } + msg.addr = control->i2c_address; + /* Read/Create table header from EEPROM address 0 */ ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); if (ret < 1) { @@ -408,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, * Update bits 16,17 of EEPROM address in I2C address by setting them * to bits 1,2 of Device address byte */ - msg->addr = EEPROM_I2C_TARGET_ADDR | - ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); + msg->addr = control->i2c_address | + ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); msg->flags = write ? 0 : I2C_M_RD; msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; msg->buf = buff; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 622269957c1b..ca78f812d436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -50,6 +50,7 @@ struct amdgpu_ras_eeprom_control { struct mutex tbl_mutex; bool bus_locked; uint32_t tbl_byte_sum; + uint16_t i2c_address; // 8-bit represented address }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index c8793e6cc3c5..6373bfb47d55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) */ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; u32 dws; int r; /* allocate clear state block */ adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) return r; } - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f940526c5889..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -473,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __string(ring, sched_job->base.sched->name); + __string(ring, sched_job->base.sched->name) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b4dd89af6f09..e324bfe6c58f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -299,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 703677f2ff6f..46b590af2fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -216,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3199e4a5ff12..9d870444d7d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -193,6 +193,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, &adev->vcn.dpg_sram_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 2d64d270725d..b22a10b2d201 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1346,10 +1346,13 @@ static int cik_asic_reset(struct amdgpu_device *adev) { int r; - if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); r = smu7_asic_baco_reset(adev); - else + } else { r = cik_asic_pci_config_reset(adev); + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a93dd3dc0902..f2c1b026397b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -690,59 +690,61 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; - version_major = le16_to_cpu(rlc_hdr->header.header_version_major); - version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - if (version_major == 2 && version_minor == 1) - adev->gfx.rlc.is_rlc_v2_1 = true; - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = + if (!amdgpu_sriov_vf(adev)) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = + adev->gfx.rlc.clear_state_descriptor_offset = le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = + adev->gfx.rlc.avail_scratch_ram_locations = le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = + adev->gfx.rlc.reg_restore_list_size = le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = + adev->gfx.rlc.reg_list_format_start = le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = + adev->gfx.rlc.reg_list_format_separate_start = le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = + adev->gfx.rlc.starting_offsets_start = le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = + adev->gfx.rlc.reg_list_format_size_bytes = le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = + adev->gfx.rlc.reg_list_size_bytes = le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = + adev->gfx.rlc.register_list_format = kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; - goto out; - } + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v10_0_init_rlc_ext_microcode(adev); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v10_0_init_rlc_ext_microcode(adev); + } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -993,39 +995,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1785,27 +1754,18 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } -static void gfx_v10_0_init_csb(struct amdgpu_device *adev) +static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); + /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); -} - -static void gfx_v10_0_init_pg(struct amdgpu_device *adev) -{ - int i; - gfx_v10_0_init_csb(adev); - - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); - - /* TODO: init power gating */ - return; + return 0; } void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) @@ -1900,18 +1860,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) { int r; - if (amdgpu_sriov_vf(adev)) - return 0; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) return r; - gfx_v10_0_init_pg(adev); - /* enable RLC SRM */ - gfx_v10_0_rlc_enable_srm(adev); + gfx_v10_0_init_csb(adev); + if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ + gfx_v10_0_rlc_enable_srm(adev); } else { adev->gfx.rlc.funcs->stop(adev); @@ -1933,7 +1891,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } - gfx_v10_0_init_pg(adev); + gfx_v10_0_init_csb(adev); + adev->gfx.rlc.funcs->start(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { @@ -2400,7 +2359,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) return 0; } -static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { int i; u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); @@ -2413,7 +2372,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) adev->gfx.gfx_ring[i].sched.ready = false; } WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); + + return 0; } static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) @@ -2784,7 +2753,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) /* Init gfx ring 0 for pipe 0 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); - mutex_unlock(&adev->srbm_mutex); + /* Set ring buffer size */ ring = &adev->gfx.gfx_ring[0]; rb_bufsz = order_base_2(ring->ring_size / 8); @@ -2822,11 +2791,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Init gfx ring 1 for pipe 1 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); - mutex_unlock(&adev->srbm_mutex); ring = &adev->gfx.gfx_ring[1]; rb_bufsz = order_base_2(ring->ring_size / 8); tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); @@ -2856,6 +2825,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Switch to pipe 0 */ mutex_lock(&adev->srbm_mutex); @@ -3114,6 +3084,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v10_gfx_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.gfx_ring[0]; if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); @@ -3125,12 +3096,12 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) #endif nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else if (adev->in_gpu_reset) { /* reset mqd with the backup copy */ - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; adev->wb.wb[ring->wptr_offs] = 0; @@ -3733,10 +3704,6 @@ static int gfx_v10_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gfx_v10_0_csb_vram_pin(adev); - if (r) - return r; - if (!amdgpu_emu_mode) gfx_v10_0_init_golden_registers(adev); @@ -3819,12 +3786,11 @@ static int gfx_v10_0_hw_fini(void *handle) if (amdgpu_gfx_disable_kcq(adev)) DRM_ERROR("KCQ disable failed\n"); if (amdgpu_sriov_vf(adev)) { - pr_debug("For SRIOV client, shouldn't do anything.\n"); + gfx_v10_0_cp_gfx_enable(adev, false); return 0; } gfx_v10_0_cp_enable(adev, false); gfx_v10_0_enable_gui_idle_interrupt(adev, false); - gfx_v10_0_csb_vram_unpin(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 791ba398f007..d92e92e5d50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4554,6 +4554,8 @@ static int gfx_v7_0_hw_init(void *handle) gfx_v7_0_constants_init(adev); + /* init CSB */ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* init rlc */ r = adev->gfx.rlc.funcs->resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ffbde9136372..983db77999e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -3917,6 +3884,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v8_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32(mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -4837,10 +4805,6 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); - r = gfx_v8_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4958,8 +4922,6 @@ static int gfx_v8_0_hw_fini(void *handle) pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); - gfx_v8_0_csb_vram_unpin(adev); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3ebd5c20dfd3..66328ffa395a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -704,6 +704,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -1051,8 +1052,13 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_VEGA20: break; case CHIP_RAVEN: - if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - &&((adev->gfx.rlc_fw_version != 106 && + /* Disable GFXOFF on original raven. There are combinations + * of sbios and platforms that are not stable. + */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && adev->gfx.rlc_fw_version < 531) || (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || @@ -1689,39 +1695,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -2409,6 +2382,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -3700,10 +3674,6 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); - r = gfx_v9_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3785,8 +3755,6 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); adev->gfx.rlc.funcs->stop(adev); - gfx_v9_0_csb_vram_unpin(adev); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9ec4297e61e5..e91bd7945777 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 5e9ab8eb214a..c0ab71df0d90 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL); u32 max_region = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + u32 max_num_physical_nodes = 0; + u32 max_physical_node_id = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + max_num_physical_nodes = 4; + max_physical_node_id = 3; + break; + case CHIP_ARCTURUS: + max_num_physical_nodes = 8; + max_physical_node_id = 7; + break; + default: + return -EINVAL; + } /* PF_MAX_REGION=0 means xgmi is disabled */ if (max_region) { adev->gmc.xgmi.num_physical_nodes = max_region + 1; - if (adev->gmc.xgmi.num_physical_nodes > 4) + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) return -EINVAL; adev->gmc.xgmi.physical_node_id = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); - if (adev->gmc.xgmi.physical_node_id > 3) + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) return -EINVAL; adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE), diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b4f32d853ca1..b70c7b483c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 27f68d32bfec..232469507446 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -235,6 +235,29 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, const unsigned eng = 17; unsigned int i; + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -254,6 +277,17 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, udelay(1); } + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + if (i < adev->usec_timeout) return; @@ -292,7 +326,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || - adev->in_gpu_reset) { + adev->in_gpu_reset || + ring->sched.ready == false) { gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); mutex_unlock(&adev->mman.gtt_window_lock); return; @@ -338,6 +373,20 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -348,6 +397,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9f2a893871ec..3c355fb5d2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -459,6 +459,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } spin_lock(&adev->gmc.invalidate_lock); + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (j = 0; j < adev->usec_timeout; j++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -474,7 +497,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, break; udelay(1); } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) return; @@ -489,6 +523,20 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -499,6 +547,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 6965e1e6fa9e..28105e4af507 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 945533634711..a7cb185d639a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 2c5adfe803a2..66efe2f7bd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -504,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; hub[i]->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index af68f9815f28..0ba66bef5746 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -40,6 +40,7 @@ #include "gc/gc_10_1_0_sh_mask.h" #include "hdp/hdp_5_0_0_offset.h" #include "hdp/hdp_5_0_0_sh_mask.h" +#include "smuio/smuio_11_0_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -156,8 +157,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev) static bool nv_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) { - /* TODO: will implement it when SMU header is available */ - return false; + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + /* set rom index to 0 */ + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + + return true; } static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 57bb5f9e08b2..88ae27a5a03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev) u32 interrupt_cntl, ih_cntl, ih_rb_cntl; si_ih_disable_interrupts(adev); - WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 9af6c6ffbfa2..57af489a5de3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -28,8 +28,8 @@ #include "nbio_v7_0.h" #include "nbio_v7_4.h" -#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 +#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3 extern const struct amd_ip_funcs soc15_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 03083e5f731a..93edf9193a7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -293,7 +293,7 @@ static int vcn_v2_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i; + int i, j; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -305,8 +305,8 @@ static int vcn_v2_5_hw_fini(void *handle) ring->sched.ready = false; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[i].ring_enc[i]; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; ring->sched.ready = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 78e5cdc0c058..f1b171e30774 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -783,10 +783,13 @@ static int vi_asic_reset(struct amdgpu_device *adev) { int r; - if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); r = smu7_asic_baco_reset(adev); - else + } else { r = vi_asic_pci_config_reset(adev); + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index a1a35d4d594b..ba0e68057a89 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -5,7 +5,7 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && (X86_64 || ARM64) + depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) imply AMD_IOMMU_V2 if X86_64 select MMU_NOTIFIER help diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index caba9ecac723..7aac9568d3be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -719,7 +719,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) */ if (adev->flags & AMD_IS_APU && adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN) + adev->asic_type < CHIP_RAVEN) init_data.flags.gpu_vm_support = true; if (amdgpu_dc_feature_mask & DC_FBC_MASK) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 49cf39711dfc..2bf8534c18fb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,7 +36,9 @@ #include "dc_link_ddc.h" #include "i2caux_interface.h" - +#if defined(CONFIG_DEBUG_FS) +#include "amdgpu_dm_debugfs.h" +#endif /* #define TRACE_DPCD */ #ifdef TRACE_DPCD @@ -147,6 +149,12 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) to_amdgpu_dm_connector(connector); struct drm_dp_mst_port *port = amdgpu_dm_connector->port; +#if defined(CONFIG_DEBUG_FS) + connector_debugfs_init(amdgpu_dm_connector); + amdgpu_dm_connector->debugfs_dpcd_address = 0; + amdgpu_dm_connector->debugfs_dpcd_size = 0; +#endif + return drm_dp_mst_connector_late_register(connector, port); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 55a520a63712..778f186b3a05 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type( if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) { if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle, dc_to_pp_clock_type(clk_type), &pp_clks)) { - /* Error in pplib. Provide default values. */ + /* Error in pplib. Provide default values. */ + get_default_clock_levels(clk_type, dc_clks); return true; } } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 921a36668ced..ac8c18fadefc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1037,6 +1037,25 @@ void dcn20_pipe_control_lock( if (pipe->plane_state != NULL) flip_immediate = pipe->plane_state->flip_immediate; + if (flip_immediate && lock) { + const int TIMEOUT_FOR_FLIP_PENDING = 100000; + int i; + + for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) { + if (!pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp)) + break; + udelay(1); + } + + if (pipe->bottom_pipe != NULL) { + for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) { + if (!pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp)) + break; + udelay(1); + } + } + } + /* In flip immediate and pipe splitting case, we need to use GSL * for synchronization. Only do setup on locking and on flip type change. */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index bbd1c98564be..09793336d84f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -157,6 +157,74 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = { .xfc_fill_constant_bytes = 0, }; +struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = { + .odm_capable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .num_dsc = 5, + .rob_buffer_size_kbytes = 168, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .max_page_table_levels = 4, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, + .line_buffer_fixed_bpp = 0, + .dcc_supported = true, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 12, + .writeback_max_vscl_taps = 12, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 5, + .max_num_dpp = 5, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 8, + .max_vscl_ratio = 8, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 32, // + .dppclk_delay_subtotal = 77, // + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 8, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 87, // + .dcfclk_cstate_latency = 10, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + .xfc_supported = true, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .ptoi_supported = 0 +}; + struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { /* Defaults that get patched on driver load from firmware. */ .clock_limits = { @@ -854,6 +922,8 @@ static const struct resource_caps res_cap_nv14 = { .num_pll = 5, .num_dwb = 1, .num_ddc = 5, + .num_vmid = 16, + .num_dsc = 5, }; static const struct dc_debug_options debug_defaults_drv = { @@ -3212,6 +3282,10 @@ static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params( uint32_t hw_internal_rev) { + /* NV14 */ + if (ASICREV_IS_NAVI14_M(hw_internal_rev)) + return &dcn2_0_nv14_ip; + /* NV12 and NV10 */ return &dcn2_0_ip; } diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 1e2da4d37567..5ff7ccedfbed 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -591,10 +591,18 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) smu_table->power_play_table = smu_table->hardcode_pptable; smu_table->power_play_table_size = size; + /* + * Special hw_fini action(for Navi1x, the DPMs disablement will be + * skipped) may be needed for custom pptable uploading. + */ + smu->uploading_custom_pp_table = true; + ret = smu_reset(smu); if (ret) pr_info("smu reset failed, ret = %d\n", ret); + smu->uploading_custom_pp_table = false; + failed: mutex_unlock(&smu->mutex); return ret; @@ -719,6 +727,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; vega20_set_ppt_funcs(smu); break; case CHIP_NAVI10: @@ -727,6 +736,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) navi10_set_ppt_funcs(smu); break; case CHIP_ARCTURUS: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; arcturus_set_ppt_funcs(smu); /* OD is not supported on Arcturus */ smu->od_enabled =false; @@ -1068,10 +1078,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; if (adev->asic_type != CHIP_ARCTURUS) { - ret = smu_override_pcie_parameters(smu); - if (ret) - return ret; - ret = smu_notify_display_change(smu); if (ret) return ret; @@ -1100,6 +1106,12 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; } + if (adev->asic_type != CHIP_ARCTURUS) { + ret = smu_override_pcie_parameters(smu); + if (ret) + return ret; + } + ret = smu_set_default_od_settings(smu, initialize); if (ret) return ret; @@ -1109,7 +1121,7 @@ static int smu_smc_table_hw_init(struct smu_context *smu, if (ret) return ret; - ret = smu_get_power_limit(smu, &smu->default_power_limit, true, false); + ret = smu_get_power_limit(smu, &smu->default_power_limit, false, false); if (ret) return ret; } @@ -1293,10 +1305,25 @@ static int smu_hw_fini(void *handle) return ret; } - ret = smu_stop_dpms(smu); - if (ret) { - pr_warn("Fail to stop Dpms!\n"); - return ret; + /* + * For custom pptable uploading, skip the DPM features + * disable process on Navi1x ASICs. + * - As the gfx related features are under control of + * RLC on those ASICs. RLC reinitialization will be + * needed to reenable them. That will cost much more + * efforts. + * + * - SMU firmware can handle the DPM reenablement + * properly. + */ + if (!smu->uploading_custom_pp_table || + !((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12))) { + ret = smu_stop_dpms(smu); + if (ret) { + pr_warn("Fail to stop Dpms!\n"); + return ret; + } } kfree(table_context->driver_pptable); @@ -2511,3 +2538,22 @@ int smu_get_dpm_clock_table(struct smu_context *smu, return ret; } + +uint32_t smu_get_pptable_power_limit(struct smu_context *smu) +{ + uint32_t ret = 0; + + if (smu->ppt_funcs->get_pptable_power_limit) + ret = smu->ppt_funcs->get_pptable_power_limit(smu); + + return ret; +} + +int smu_send_smc_msg(struct smu_context *smu, + enum smu_message_type msg) +{ + int ret; + + ret = smu_send_smc_msg_with_param(smu, msg, 0); + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3099ac256bd3..ce3566ca3e24 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1261,15 +1261,14 @@ arcturus_get_profiling_clk_mask(struct smu_context *smu, static int arcturus_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1292,17 +1291,11 @@ static int arcturus_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -2070,6 +2063,13 @@ static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control) i2c_del_adapter(control); } +static uint32_t arcturus_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2130,7 +2130,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, @@ -2160,6 +2159,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .get_pptable_power_limit = arcturus_get_pptable_power_limit, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index a24beaa4fb01..d2909c91d65b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -81,6 +81,8 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr) int hwmgr_early_init(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev; + if (!hwmgr) return -EINVAL; @@ -94,8 +96,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) hwmgr_init_workload_prority(hwmgr); hwmgr->gfxoff_state_changed_by_workload = false; + adev = hwmgr->adev; + switch (hwmgr->chip_family) { case AMDGPU_FAMILY_CI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &ci_smu_funcs; ci_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK | @@ -106,12 +111,14 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) smu7_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_CZ: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &smu8_smu_funcs; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; smu8_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_VI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_TOPAZ: @@ -153,6 +160,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) case AMDGPU_FAMILY_AI: switch (hwmgr->chip_id) { case CHIP_VEGA10: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega10_smu_funcs; vega10_hwmgr_init(hwmgr); @@ -162,6 +170,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) vega12_hwmgr_init(hwmgr); break; case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega20_smu_funcs; vega20_hwmgr_init(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c805c6fcdba2..f73dff68e799 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3477,18 +3477,31 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr, static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query) { + struct amdgpu_device *adev = hwmgr->adev; int i; u32 tmp = 0; if (!query) return -EINVAL; - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); - tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); - *query = tmp; + /* + * PPSMC_MSG_GetCurrPkgPwr is not supported on: + * - Hawaii + * - Bonaire + * - Fiji + * - Tonga + */ + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) { + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); + tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + *query = tmp; - if (tmp != 0) - return 0; + if (tmp != 0) + return 0; + } smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, @@ -3969,6 +3982,13 @@ static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) "Failed to populate and upload SCLK MCLK DPM levels!", result = tmp_result); + /* + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. + * That effectively disables AVFS feature. + */ + if (hwmgr->hardcode_pp_table != NULL) + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + tmp_result = smu7_update_avfs(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to update avfs voltages!", diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 8120e7587585..ac9758305ab3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -261,7 +261,6 @@ struct smu_table_context struct smu_table *tables; struct smu_table memory_pool; uint8_t thermal_controller_type; - uint16_t TDPODLimit; void *overdrive_table; }; @@ -391,6 +390,7 @@ struct smu_context uint32_t smc_if_version; + bool uploading_custom_pp_table; }; struct i2c_adapter; @@ -497,8 +497,8 @@ struct pptable_funcs { int (*notify_memory_pool_location)(struct smu_context *smu); int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu); int (*system_features_control)(struct smu_context *smu, bool en); - int (*send_smc_msg)(struct smu_context *smu, uint16_t msg); - int (*send_smc_msg_with_param)(struct smu_context *smu, uint16_t msg, uint32_t param); + int (*send_smc_msg_with_param)(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int (*read_smc_arg)(struct smu_context *smu, uint32_t *arg); int (*init_display_count)(struct smu_context *smu, uint32_t count); int (*set_allowed_mask)(struct smu_context *smu); @@ -548,6 +548,7 @@ struct pptable_funcs { int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); int (*override_pcie_parameters)(struct smu_context *smu); + uint32_t (*get_pptable_power_limit)(struct smu_context *smu); }; int smu_load_microcode(struct smu_context *smu); @@ -717,4 +718,6 @@ int smu_get_uclk_dpm_states(struct smu_context *smu, int smu_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table); +uint32_t smu_get_pptable_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index fd6ec9033d06..719844257713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -48,6 +48,8 @@ #define SMU11_TOOL_SIZE 0x19000 +#define MAX_PCIE_CONF 2 + #define CLK_MAP(clk, index) \ [SMU_##clk] = {1, (index)} @@ -88,6 +90,11 @@ struct smu_11_0_dpm_table { uint32_t max; /* MHz */ }; +struct smu_11_0_pcie_table { + uint8_t pcie_gen[MAX_PCIE_CONF]; + uint8_t pcie_lane[MAX_PCIE_CONF]; +}; + struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table soc_table; struct smu_11_0_dpm_table gfx_table; @@ -100,6 +107,7 @@ struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table display_table; struct smu_11_0_dpm_table phy_table; struct smu_11_0_dpm_table fclk_table; + struct smu_11_0_pcie_table pcie_table; }; struct smu_11_0_dpm_context { @@ -169,10 +177,9 @@ int smu_v11_0_notify_memory_pool_location(struct smu_context *smu); int smu_v11_0_system_features_control(struct smu_context *smu, bool en); -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg); - int -smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v11_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg); @@ -250,4 +257,8 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ int smu_v11_0_override_pcie_parameters(struct smu_context *smu); +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size); + +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h index 86cdc3393eac..b2f96a101124 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h @@ -141,7 +141,9 @@ struct smu_11_0_powerplay_table struct smu_11_0_power_saving_clock_table power_saving_clock; struct smu_11_0_overdrive_table overdrive_table; +#ifndef SMU_11_0_PARTIAL_PPTABLE PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h +#endif } __attribute__((packed)); #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h index 9b9f5df0911c..9d81d789c713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h @@ -44,10 +44,9 @@ int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg); int smu_v12_0_wait_for_response(struct smu_context *smu); -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg); - int -smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v12_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int smu_v12_0_check_fw_status(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 354f70978f82..4a14fd1f9fd5 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -36,6 +36,7 @@ #include "navi10_ppt.h" #include "smu_v11_0_pptable.h" #include "smu_v11_0_ppsmc.h" +#include "nbio/nbio_7_4_sh_mask.h" #include "asic_reg/mp/mp_11_0_sh_mask.h" @@ -599,6 +600,7 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) struct smu_table_context *table_context = &smu->smu_table; struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; PPTable_t *driver_ppt = NULL; + int i; driver_ppt = table_context->driver_pptable; @@ -629,6 +631,11 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0]; dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1]; + for (i = 0; i < MAX_PCIE_CONF; i++) { + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i]; + } + return 0; } @@ -691,13 +698,29 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu return dpm_desc->SnapToDiscrete == 0 ? true : false; } +static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature) +{ + return od_table->cap[feature]; +} + + static int navi10_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { + uint16_t *curve_settings; int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t freq_values[3] = {0}; uint32_t mark_index = 0; + struct smu_table_context *table_context = &smu->smu_table; + uint32_t gen_speed, lane_width; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + struct amdgpu_device *adev = smu->adev; + PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable; + OverDriveTable_t *od_table = + (OverDriveTable_t *)table_context->overdrive_table; + struct smu_11_0_overdrive_table *od_settings = smu->od_settings; switch (clk_type) { case SMU_GFXCLK: @@ -748,6 +771,69 @@ static int navi10_print_clk_levels(struct smu_context *smu, } break; + case SMU_PCIE: + gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & + PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) + >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; + lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) & + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + for (i = 0; i < NUM_LINK_LEVELS; i++) + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 3) ? "16.0GT/s," : "", + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 1) ? "x1" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 2) ? "x2" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 3) ? "x4" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 4) ? "x8" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 5) ? "x12" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 6) ? "x16" : "", + pptable->LclkFreq[i], + (gen_speed == dpm_context->dpm_tables.pcie_table.pcie_gen[i]) && + (lane_width == dpm_context->dpm_tables.pcie_table.pcie_lane[i]) ? + "*" : ""); + break; + case SMU_OD_SCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) + break; + size += sprintf(buf + size, "OD_SCLK:\n"); + size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + break; + case SMU_OD_MCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) + break; + size += sprintf(buf + size, "OD_MCLK:\n"); + size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax); + break; + case SMU_OD_VDDC_CURVE: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) + break; + size += sprintf(buf + size, "OD_VDDC_CURVE:\n"); + for (i = 0; i < 3; i++) { + switch (i) { + case 0: + curve_settings = &od_table->GfxclkFreq1; + break; + case 1: + curve_settings = &od_table->GfxclkFreq2; + break; + case 2: + curve_settings = &od_table->GfxclkFreq3; + break; + default: + break; + } + size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); + } + break; default: break; } @@ -773,6 +859,12 @@ static int navi10_force_clk_levels(struct smu_context *smu, case SMU_UCLK: case SMU_DCEFCLK: case SMU_FCLK: + /* There is only 2 levels for fine grained DPM */ + if (navi10_is_support_fine_grained_dpm(smu, clk_type)) { + soft_max_level = (soft_max_level >= 1 ? 1 : 0); + soft_min_level = (soft_min_level >= 1 ? 1 : 0); + } + ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) return size; @@ -1582,17 +1674,22 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu, return ret; } +static uint32_t navi10_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static int navi10_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1615,17 +1712,11 @@ static int navi10_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -1640,6 +1731,9 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, int ret, i; uint32_t smu_pcie_arg; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16) | ((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) : @@ -1648,11 +1742,260 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, ret = smu_send_smc_msg_with_param(smu, SMU_MSG_OverridePcieParameters, smu_pcie_arg); + + if (ret) + return ret; + + if (pptable->PcieGenSpeed[i] > pcie_gen_cap) + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap; + if (pptable->PcieLaneCount[i] > pcie_width_cap) + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap; + } + + return 0; +} + +static inline void navi10_dump_od_table(OverDriveTable_t *od_table) { + pr_debug("OD: Gfxclk: (%d, %d)\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + pr_debug("OD: Gfx1: (%d, %d)\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1); + pr_debug("OD: Gfx2: (%d, %d)\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2); + pr_debug("OD: Gfx3: (%d, %d)\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3); + pr_debug("OD: UclkFmax: %d\n", od_table->UclkFmax); + pr_debug("OD: OverDrivePct: %d\n", od_table->OverDrivePct); +} + +static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODSETTING_ID setting, uint32_t value) +{ + if (value < od_table->min[setting]) { + pr_warn("OD setting (%d, %d) is less than the minimum allowed (%d)\n", setting, value, od_table->min[setting]); + return -EINVAL; + } + if (value > od_table->max[setting]) { + pr_warn("OD setting (%d, %d) is greater than the maximum allowed (%d)\n", setting, value, od_table->max[setting]); + return -EINVAL; + } + return 0; +} + +static int navi10_setup_od_limits(struct smu_context *smu) { + struct smu_11_0_overdrive_table *overdrive_table = NULL; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + + if (!smu->smu_table.power_play_table) { + pr_err("powerplay table uninitialized!\n"); + return -ENOENT; + } + powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table; + overdrive_table = &powerplay_table->overdrive_table; + if (!smu->od_settings) { + smu->od_settings = kmemdup(overdrive_table, sizeof(struct smu_11_0_overdrive_table), GFP_KERNEL); + } else { + memcpy(smu->od_settings, overdrive_table, sizeof(struct smu_11_0_overdrive_table)); + } + return 0; +} + +static int navi10_set_default_od_settings(struct smu_context *smu, bool initialize) { + OverDriveTable_t *od_table; + int ret = 0; + + ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); + if (ret) + return ret; + + if (initialize) { + ret = navi10_setup_od_limits(smu); + if (ret) { + pr_err("Failed to retrieve board OD limits\n"); + return ret; + } + } + od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; + if (od_table) { + navi10_dump_od_table(od_table); + } + + return ret; +} + +static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) { + int i; + int ret = 0; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table; + struct smu_11_0_overdrive_table *od_settings; + enum SMU_11_0_ODSETTING_ID freq_setting, voltage_setting; + uint16_t *freq_ptr, *voltage_ptr; + od_table = (OverDriveTable_t *)table_context->overdrive_table; + + if (!smu->od_enabled) { + pr_warn("OverDrive is not enabled!\n"); + return -EINVAL; + } + + if (!smu->od_settings) { + pr_err("OD board limits are not set!\n"); + return -ENOENT; + } + + od_settings = smu->od_settings; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + pr_warn("GFXCLK_LIMITS not supported!\n"); + return -ENOTSUPP; + } + if (!table_context->overdrive_table) { + pr_err("Overdrive is not initialized\n"); + return -EINVAL; + } + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + pr_info("invalid number of input parameters %d\n", size); + return -EINVAL; + } + switch (input[i]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMIN; + freq_ptr = &od_table->GfxclkFmin; + if (input[i + 1] > od_table->GfxclkFmax) { + pr_info("GfxclkFmin (%ld) must be <= GfxclkFmax (%u)!\n", + input[i + 1], + od_table->GfxclkFmin); + return -EINVAL; + } + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMAX; + freq_ptr = &od_table->GfxclkFmax; + if (input[i + 1] < od_table->GfxclkFmin) { + pr_info("GfxclkFmax (%ld) must be >= GfxclkFmin (%u)!\n", + input[i + 1], + od_table->GfxclkFmax); + return -EINVAL; + } + break; + default: + pr_info("Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + pr_info("Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[i + 1]); + if (ret) + return ret; + *freq_ptr = input[i + 1]; + } + break; + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + pr_warn("UCLK_MAX not supported!\n"); + return -ENOTSUPP; + } + if (size < 2) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (input[0] != 1) { + pr_info("Invalid MCLK_VDDC_TABLE index: %ld\n", input[0]); + pr_info("Supported indices: [1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, input[1]); + if (ret) + return ret; + od_table->UclkFmax = input[1]; + break; + case PP_OD_COMMIT_DPM_TABLE: + navi10_dump_od_table(od_table); + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + // no lock needed because smu_od_edit_dpm_table has it + ret = smu_handle_task(smu, smu->smu_dpm.dpm_level, + AMD_PP_TASK_READJUST_POWER_STATE, + false); + if (ret) { + return ret; + } + break; + case PP_OD_EDIT_VDDC_CURVE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + pr_warn("GFXCLK_CURVE not supported!\n"); + return -ENOTSUPP; + } + if (size < 3) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (!od_table) { + pr_info("Overdrive is not initialized\n"); + return -EINVAL; + } + + switch (input[0]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1; + freq_ptr = &od_table->GfxclkFreq1; + voltage_ptr = &od_table->GfxclkVolt1; + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2; + freq_ptr = &od_table->GfxclkFreq2; + voltage_ptr = &od_table->GfxclkVolt2; + break; + case 2: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3; + freq_ptr = &od_table->GfxclkFreq3; + voltage_ptr = &od_table->GfxclkVolt3; + break; + default: + pr_info("Invalid VDDC_CURVE index: %ld\n", input[0]); + pr_info("Supported indices: [0, 1, 2]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[1]); + if (ret) + return ret; + // Allow setting zero to disable the OverDrive VDDC curve + if (input[2] != 0) { + ret = navi10_od_setting_check_range(od_settings, voltage_setting, input[2]); + if (ret) + return ret; + *freq_ptr = input[1]; + *voltage_ptr = ((uint16_t)input[2]) * NAVI10_VOLTAGE_SCALE; + pr_debug("OD: set curve %ld: (%d, %d)\n", input[0], *freq_ptr, *voltage_ptr); + } else { + // If setting 0, disable all voltage curve settings + od_table->GfxclkVolt1 = 0; + od_table->GfxclkVolt2 = 0; + od_table->GfxclkVolt3 = 0; + } + navi10_dump_od_table(od_table); + break; + default: + return -ENOSYS; + } return ret; } +static int navi10_run_btc(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_send_smc_msg(smu, SMU_MSG_RunBtc); + if (ret) + pr_err("RunBtc failed!\n"); + + return ret; +} static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, @@ -1712,7 +2055,6 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, @@ -1742,6 +2084,10 @@ static const struct pptable_funcs navi10_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .set_default_od_settings = navi10_set_default_od_settings, + .od_edit_dpm_table = navi10_od_edit_dpm_table, + .get_pptable_power_limit = navi10_get_pptable_power_limit, + .run_btc = navi10_run_btc, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h index a37e37c5f105..ec03c7992f6d 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h @@ -33,6 +33,11 @@ #define NAVI14_UMD_PSTATE_PEAK_XTX_GFXCLK (1717) #define NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK (1448) +#define NAVI10_VOLTAGE_SCALE (4) + +#define smnPCIE_LC_SPEED_CNTL 0x11140290 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + extern void navi10_set_ppt_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 04daf7e9fe05..977bdd962e98 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -697,7 +697,6 @@ static const struct pptable_funcs renoir_ppt_funcs = { .check_fw_version = smu_v12_0_check_fw_version, .powergate_sdma = smu_v12_0_powergate_sdma, .powergate_vcn = smu_v12_0_powergate_vcn, - .send_smc_msg = smu_v12_0_send_msg, .send_smc_msg_with_param = smu_v12_0_send_msg_with_param, .read_smc_arg = smu_v12_0_read_arg, .set_gfx_cgpg = smu_v12_0_set_gfx_cgpg, diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index 8bcda7871309..8872f8b2d502 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -75,8 +75,8 @@ #define smu_set_default_od_settings(smu, initialize) \ ((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0) -#define smu_send_smc_msg(smu, msg) \ - ((smu)->ppt_funcs->send_smc_msg? (smu)->ppt_funcs->send_smc_msg((smu), (msg)) : 0) +int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg); + #define smu_send_smc_msg_with_param(smu, msg, param) \ ((smu)->ppt_funcs->send_smc_msg_with_param? (smu)->ppt_funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0) #define smu_read_smc_arg(smu, arg) \ diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index e859bb1132ac..e4268a627eff 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -24,6 +24,8 @@ #include <linux/module.h> #include <linux/pci.h> +#define SMU_11_0_PARTIAL_PPTABLE + #include "pp_debug.h" #include "amdgpu.h" #include "amdgpu_smu.h" @@ -31,6 +33,7 @@ #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" +#include "smu_v11_0_pptable.h" #include "soc15_common.h" #include "atom.h" #include "amd_pcie.h" @@ -87,36 +90,11 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0, index = 0; - - index = smu_msg_get_index(smu, msg); - if (index < 0) - return index; - - smu_v11_0_wait_for_response(smu); - - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - - smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index); - - ret = smu_v11_0_wait_for_response(smu); - - if (ret) - pr_err("failed send message: %10s (%d) response %#x\n", - smu_get_message_name(smu, msg), index, ret); - - return ret; - -} - int -smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v11_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param) { - struct amdgpu_device *adev = smu->adev; int ret = 0, index = 0; @@ -1045,13 +1023,44 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) return 0; } +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu) { + uint32_t od_limit, max_power_limit; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + struct smu_table_context *table_context = &smu->smu_table; + powerplay_table = table_context->power_play_table; + + max_power_limit = smu_get_pptable_power_limit(smu); + + if (!max_power_limit) { + // If we couldn't get the table limit, fall back on first-read value + if (!smu->default_power_limit) + smu->default_power_limit = smu->power_limit; + max_power_limit = smu->default_power_limit; + } + + if (smu->od_enabled) { + od_limit = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + + pr_debug("ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_limit, smu->default_power_limit); + + max_power_limit *= (100 + od_limit); + max_power_limit /= 100; + } + + return max_power_limit; +} + int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) { int ret = 0; + uint32_t max_power_limit; + + max_power_limit = smu_v11_0_get_max_power_limit(smu); - if (n > smu->default_power_limit) { - pr_err("New power limit is over the max allowed %d\n", - smu->default_power_limit); + if (n > max_power_limit) { + pr_err("New power limit (%d) is over the max allowed %d\n", + n, + max_power_limit); return -EINVAL; } @@ -1779,3 +1788,30 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu) return ret; } + +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size) +{ + struct smu_table_context *table_context = &smu->smu_table; + int ret = 0; + + if (initialize) { + if (table_context->overdrive_table) { + return -EINVAL; + } + table_context->overdrive_table = kzalloc(overdrive_table_size, GFP_KERNEL); + if (!table_context->overdrive_table) { + return -ENOMEM; + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false); + if (ret) { + pr_err("Failed to export overdrive table!\n"); + return ret; + } + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c index 139dd737eaa5..094cfc46adac 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c @@ -77,33 +77,9 @@ int smu_v12_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0, index = 0; - - index = smu_msg_get_index(smu, msg); - if (index < 0) - return index; - - smu_v12_0_wait_for_response(smu); - - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - - smu_v12_0_send_msg_without_waiting(smu, (uint16_t)index); - - ret = smu_v12_0_wait_for_response(smu); - - if (ret) - pr_err("Failed to send message 0x%x, response 0x%x\n", index, - ret); - - return ret; - -} - int -smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v12_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param) { struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 5b21386f558d..60b9ff097142 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -466,7 +466,6 @@ static int vega20_store_powerplay_table(struct smu_context *smu) sizeof(PPTable_t)); table_context->thermal_controller_type = powerplay_table->ucThermalControllerType; - table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]); return 0; } @@ -3232,7 +3231,6 @@ static const struct pptable_funcs vega20_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ae5809a1f19a..273dd80fabf3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3176,9 +3176,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) drm_dp_mst_topology_put_port(port); } - for (i = 0; i < mgr->max_payloads; i++) { - if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL) + for (i = 0; i < mgr->max_payloads; /* do nothing */) { + if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL) { + i++; continue; + } DRM_DEBUG_KMS("removing payload %d\n", i); for (j = i; j < mgr->max_payloads - 1; j++) { diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 2f2b889096b0..000fa4a1899f 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1105,21 +1105,33 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, if (obj_size < vma->vm_end - vma->vm_start) return -EINVAL; + /* Take a ref for this mapping of the object, so that the fault + * handler can dereference the mmap offset's pointer to the object. + * This reference is cleaned up by the corresponding vm_close + * (which should happen whether the vma was created by this call, or + * by a vm_open due to mremap or partial unmap or whatever). + */ + drm_gem_object_get(obj); + if (obj->funcs && obj->funcs->mmap) { /* Remove the fake offset */ vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); ret = obj->funcs->mmap(obj, vma); - if (ret) + if (ret) { + drm_gem_object_put_unlocked(obj); return ret; + } WARN_ON(!(vma->vm_flags & VM_DONTEXPAND)); } else { if (obj->funcs && obj->funcs->vm_ops) vma->vm_ops = obj->funcs->vm_ops; else if (dev->driver->gem_vm_ops) vma->vm_ops = dev->driver->gem_vm_ops; - else + else { + drm_gem_object_put_unlocked(obj); return -EINVAL; + } vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); @@ -1128,14 +1140,6 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, vma->vm_private_data = obj; - /* Take a ref for this mapping of the object, so that the fault - * handler can dereference the mmap offset's pointer to the object. - * This reference is cleaned up by the corresponding vm_close - * (which should happen whether the vma was created by this call, or - * by a vm_open due to mremap or partial unmap or whatever). - */ - drm_gem_object_get(obj); - return 0; } EXPORT_SYMBOL(drm_gem_mmap_obj); diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index 7412bfc5c05a..605a8a3da7f9 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -64,8 +64,19 @@ int drm_gem_ttm_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + int ret; - return ttm_bo_mmap_obj(vma, bo); + ret = ttm_bo_mmap_obj(vma, bo); + if (ret < 0) + return ret; + + /* + * ttm has its own object refcounting, so drop gem reference + * to avoid double accounting counting. + */ + drm_gem_object_put_unlocked(gem); + + return 0; } EXPORT_SYMBOL(drm_gem_ttm_mmap); diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c index 397f8b0a9af8..d43951caeea0 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.c +++ b/drivers/gpu/drm/mgag200/mgag200_drv.c @@ -30,6 +30,8 @@ module_param_named(modeset, mgag200_modeset, int, 0400); static struct drm_driver driver; static const struct pci_device_id pciidlist[] = { + { PCI_VENDOR_ID_MATROX, 0x522, PCI_VENDOR_ID_SUN, 0x4852, 0, 0, + G200_SE_A | MGAG200_FLAG_HW_BUG_NO_STARTADD}, { PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_A }, { PCI_VENDOR_ID_MATROX, 0x524, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_B }, { PCI_VENDOR_ID_MATROX, 0x530, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_EV }, @@ -60,6 +62,35 @@ static void mga_pci_remove(struct pci_dev *pdev) DEFINE_DRM_GEM_FOPS(mgag200_driver_fops); +static bool mgag200_pin_bo_at_0(const struct mga_device *mdev) +{ + return mdev->flags & MGAG200_FLAG_HW_BUG_NO_STARTADD; +} + +int mgag200_driver_dumb_create(struct drm_file *file, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct mga_device *mdev = dev->dev_private; + unsigned long pg_align; + + if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized")) + return -EINVAL; + + pg_align = 0ul; + + /* + * Aligning scanout buffers to the size of the video ram forces + * placement at offset 0. Works around a bug where HW does not + * respect 'startadd' field. + */ + if (mgag200_pin_bo_at_0(mdev)) + pg_align = PFN_UP(mdev->mc.vram_size); + + return drm_gem_vram_fill_create_dumb(file, dev, &dev->vram_mm->bdev, + pg_align, false, args); +} + static struct drm_driver driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET, .load = mgag200_driver_load, @@ -71,7 +102,10 @@ static struct drm_driver driver = { .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, - DRM_GEM_VRAM_DRIVER + .debugfs_init = drm_vram_mm_debugfs_init, + .dumb_create = mgag200_driver_dumb_create, + .dumb_map_offset = drm_gem_vram_driver_dumb_mmap_offset, + .gem_prime_mmap = drm_gem_prime_mmap, }; static struct pci_driver mgag200_pci_driver = { diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 0ea9a525e57d..aa32aad222c2 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -150,6 +150,12 @@ enum mga_type { G200_EW3, }; +/* HW does not handle 'startadd' field correct. */ +#define MGAG200_FLAG_HW_BUG_NO_STARTADD (1ul << 8) + +#define MGAG200_TYPE_MASK (0x000000ff) +#define MGAG200_FLAG_MASK (0x00ffff00) + #define IS_G200_SE(mdev) (mdev->type == G200_SE_A || mdev->type == G200_SE_B) struct mga_device { @@ -181,6 +187,18 @@ struct mga_device { u32 unique_rev_id; }; +static inline enum mga_type +mgag200_type_from_driver_data(kernel_ulong_t driver_data) +{ + return (enum mga_type)(driver_data & MGAG200_TYPE_MASK); +} + +static inline unsigned long +mgag200_flags_from_driver_data(kernel_ulong_t driver_data) +{ + return driver_data & MGAG200_FLAG_MASK; +} + /* mgag200_mode.c */ int mgag200_modeset_init(struct mga_device *mdev); void mgag200_modeset_fini(struct mga_device *mdev); diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c index 5f74aabcd3df..e1bc5b0aa774 100644 --- a/drivers/gpu/drm/mgag200/mgag200_main.c +++ b/drivers/gpu/drm/mgag200/mgag200_main.c @@ -94,7 +94,8 @@ static int mgag200_device_init(struct drm_device *dev, struct mga_device *mdev = dev->dev_private; int ret, option; - mdev->type = flags; + mdev->flags = mgag200_flags_from_driver_data(flags); + mdev->type = mgag200_type_from_driver_data(flags); /* Hardcode the number of CRTCs to 1 */ mdev->num_crtc = 1; diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index e9160ce39cbb..6deaa7d01654 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -7,6 +7,7 @@ config DRM_MSM depends on OF && COMMON_CLK depends on MMU depends on INTERCONNECT || !INTERCONNECT + depends on QCOM_OCMEM || QCOM_OCMEM=n select QCOM_MDT_LOADER if ARCH_QCOM select REGULATOR select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 5f7e98028eaf..7ad14937fcdf 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -6,10 +6,6 @@ * Copyright (c) 2014 The Linux Foundation. All rights reserved. */ -#ifdef CONFIG_MSM_OCMEM -# include <mach/ocmem.h> -#endif - #include "a3xx_gpu.h" #define A3XX_INT0_MASK \ @@ -195,9 +191,9 @@ static int a3xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); /* Set the OCMEM base address for A330, etc */ - if (a3xx_gpu->ocmem_hdl) { + if (a3xx_gpu->ocmem.hdl) { gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, - (unsigned int)(a3xx_gpu->ocmem_base >> 14)); + (unsigned int)(a3xx_gpu->ocmem.base >> 14)); } /* Turn on performance counters: */ @@ -318,10 +314,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) adreno_gpu_cleanup(adreno_gpu); -#ifdef CONFIG_MSM_OCMEM - if (a3xx_gpu->ocmem_base) - ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl); -#endif + adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem); kfree(a3xx_gpu); } @@ -494,17 +487,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) /* if needed, allocate gmem: */ if (adreno_is_a330(adreno_gpu)) { -#ifdef CONFIG_MSM_OCMEM - /* TODO this is different/missing upstream: */ - struct ocmem_buf *ocmem_hdl = - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); - - a3xx_gpu->ocmem_hdl = ocmem_hdl; - a3xx_gpu->ocmem_base = ocmem_hdl->addr; - adreno_gpu->gmem = ocmem_hdl->len; - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, - a3xx_gpu->ocmem_base); -#endif + ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev, + adreno_gpu, &a3xx_gpu->ocmem); + if (ret) + goto fail; } if (!gpu->aspace) { diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h index 5dc33e5ea53b..c555fb13e0d7 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h @@ -19,8 +19,7 @@ struct a3xx_gpu { struct adreno_gpu base; /* if OCMEM is used for GMEM: */ - uint32_t ocmem_base; - void *ocmem_hdl; + struct adreno_ocmem ocmem; }; #define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index ab2b752566d8..b01388a9e89e 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -2,9 +2,6 @@ /* Copyright (c) 2014 The Linux Foundation. All rights reserved. */ #include "a4xx_gpu.h" -#ifdef CONFIG_MSM_OCMEM -# include <soc/qcom/ocmem.h> -#endif #define A4XX_INT0_MASK \ (A4XX_INT0_RBBM_AHB_ERROR | \ @@ -188,7 +185,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) (1 << 30) | 0xFFFF); gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, - (unsigned int)(a4xx_gpu->ocmem_base >> 14)); + (unsigned int)(a4xx_gpu->ocmem.base >> 14)); /* Turn on performance counters: */ gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); @@ -318,10 +315,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) adreno_gpu_cleanup(adreno_gpu); -#ifdef CONFIG_MSM_OCMEM - if (a4xx_gpu->ocmem_base) - ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl); -#endif + adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem); kfree(a4xx_gpu); } @@ -578,17 +572,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) /* if needed, allocate gmem: */ if (adreno_is_a4xx(adreno_gpu)) { -#ifdef CONFIG_MSM_OCMEM - /* TODO this is different/missing upstream: */ - struct ocmem_buf *ocmem_hdl = - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); - - a4xx_gpu->ocmem_hdl = ocmem_hdl; - a4xx_gpu->ocmem_base = ocmem_hdl->addr; - adreno_gpu->gmem = ocmem_hdl->len; - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, - a4xx_gpu->ocmem_base); -#endif + ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, + &a4xx_gpu->ocmem); + if (ret) + goto fail; } if (!gpu->aspace) { diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h index d506311ee240..a01448cba2ea 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h @@ -16,8 +16,7 @@ struct a4xx_gpu { struct adreno_gpu base; /* if OCMEM is used for GMEM: */ - uint32_t ocmem_base; - void *ocmem_hdl; + struct adreno_ocmem ocmem; }; #define to_a4xx_gpu(x) container_of(x, struct a4xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e9c55d1d6c04..b02e2042547f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -353,6 +353,9 @@ static int a5xx_me_init(struct msm_gpu *gpu) * 2D mode 3 draw */ OUT_RING(ring, 0x0000000B); + } else if (adreno_is_a510(adreno_gpu)) { + /* Workaround for token and syncs */ + OUT_RING(ring, 0x00000001); } else { /* No workarounds enabled */ OUT_RING(ring, 0x00000000); @@ -568,15 +571,24 @@ static int a5xx_hw_init(struct msm_gpu *gpu) 0x00100000 + adreno_gpu->gmem - 1); gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); - gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); - if (adreno_is_a530(adreno_gpu)) - gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); - if (adreno_is_a540(adreno_gpu)) - gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); - gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); - gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); - - gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22)); + if (adreno_is_a510(adreno_gpu)) { + gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x200 << 11 | 0x200 << 22)); + } else { + gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); + if (adreno_is_a530(adreno_gpu)) + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); + if (adreno_is_a540(adreno_gpu)) + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x400 << 11 | 0x300 << 22)); + } if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); @@ -589,6 +601,19 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* Enable ME/PFP split notification */ gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); + /* + * In A5x, CCU can send context_done event of a particular context to + * UCHE which ultimately reaches CP even when there is valid + * transaction of that context inside CCU. This can let CP to program + * config registers, which will make the "valid transaction" inside + * CCU to be interpreted differently. This can cause gpu fault. This + * bug is fixed in latest A510 revision. To enable this bug fix - + * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 + * (disable). For older A510 version this bit is unused. + */ + if (adreno_is_a510(adreno_gpu)) + gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0); + /* Enable HWCG */ a5xx_set_hwcg(gpu, true); @@ -635,7 +660,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* UCHE */ gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); - if (adreno_is_a530(adreno_gpu)) + if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) gpu_write(gpu, REG_A5XX_CP_PROTECT(17), ADRENO_PROTECT_RW(0x10000, 0x8000)); @@ -679,7 +704,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) a5xx_preempt_hw_init(gpu); - a5xx_gpmu_ucode_init(gpu); + if (!adreno_is_a510(adreno_gpu)) + a5xx_gpmu_ucode_init(gpu); ret = a5xx_ucode_init(gpu); if (ret) @@ -712,7 +738,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) } /* - * Try to load a zap shader into the secure world. If successful + * If the chip that we are using does support loading one, then + * try to load a zap shader into the secure world. If successful * we can use the CP to switch out of secure mode. If not then we * have no resource but to try to switch ourselves out manually. If we * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will @@ -1066,6 +1093,7 @@ static void a5xx_dump(struct msm_gpu *gpu) static int a5xx_pm_resume(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int ret; /* Turn on the core power */ @@ -1073,6 +1101,15 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; + if (adreno_is_a510(adreno_gpu)) { + /* Halt the sp_input_clk at HM level */ + gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); + a5xx_set_hwcg(gpu, true); + /* Turn on sp_input_clk at HM level */ + gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0); + return 0; + } + /* Turn the RBCCU domain first to limit the chances of voltage droop */ gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); @@ -1101,9 +1138,17 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) static int a5xx_pm_suspend(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + u32 mask = 0xf; + + /* A510 has 3 XIN ports in VBIF */ + if (adreno_is_a510(adreno_gpu)) + mask = 0x7; + /* Clear the VBIF pipe before shutting down */ - gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF); - spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF); + gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask); + spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & + mask) == mask); gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); @@ -1289,7 +1334,7 @@ static void a5xx_gpu_state_destroy(struct kref *kref) kfree(a5xx_state); } -int a5xx_gpu_state_put(struct msm_gpu_state *state) +static int a5xx_gpu_state_put(struct msm_gpu_state *state) { if (IS_ERR_OR_NULL(state)) return 1; @@ -1299,8 +1344,8 @@ int a5xx_gpu_state_put(struct msm_gpu_state *state) #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) -void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, - struct drm_printer *p) +static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, + struct drm_printer *p) { int i, j; u32 pos = 0; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index a3a06db675ba..321a8061fd32 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -297,6 +297,10 @@ int a5xx_power_init(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int ret; + /* Not all A5xx chips have a GPMU */ + if (adreno_is_a510(adreno_gpu)) + return 0; + /* Set up the limits management */ if (adreno_is_a530(adreno_gpu)) a530_lm_setup(gpu); @@ -326,6 +330,9 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) unsigned int *data, *ptr, *cmds; unsigned int cmds_size; + if (adreno_is_a510(adreno_gpu)) + return; + if (a5xx_gpu->gpmu_bo) return; diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 0888e0df660d..fbbdf86504f5 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -115,6 +115,21 @@ static const struct adreno_info gpulist[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a4xx_gpu_init, }, { + .rev = ADRENO_REV(5, 1, 0, ANY_ID), + .revn = 510, + .name = "A510", + .fw = { + [ADRENO_FW_PM4] = "a530_pm4.fw", + [ADRENO_FW_PFP] = "a530_pfp.fw", + }, + .gmem = SZ_256K, + /* + * Increase inactive period to 250 to avoid bouncing + * the GDSC which appears to make it grumpy + */ + .inactive_period = 250, + .init = a5xx_gpu_init, + }, { .rev = ADRENO_REV(5, 3, 0, 2), .revn = 530, .name = "A530", diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 048c8be426f3..0783e4b5486a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -14,6 +14,7 @@ #include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/soc/qcom/mdt_loader.h> +#include <soc/qcom/ocmem.h> #include "adreno_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" @@ -893,6 +894,45 @@ static int adreno_get_pwrlevels(struct device *dev, return 0; } +int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, + struct adreno_ocmem *adreno_ocmem) +{ + struct ocmem_buf *ocmem_hdl; + struct ocmem *ocmem; + + ocmem = of_get_ocmem(dev); + if (IS_ERR(ocmem)) { + if (PTR_ERR(ocmem) == -ENODEV) { + /* + * Return success since either the ocmem property was + * not specified in device tree, or ocmem support is + * not compiled into the kernel. + */ + return 0; + } + + return PTR_ERR(ocmem); + } + + ocmem_hdl = ocmem_allocate(ocmem, OCMEM_GRAPHICS, adreno_gpu->gmem); + if (IS_ERR(ocmem_hdl)) + return PTR_ERR(ocmem_hdl); + + adreno_ocmem->ocmem = ocmem; + adreno_ocmem->base = ocmem_hdl->addr; + adreno_ocmem->hdl = ocmem_hdl; + adreno_gpu->gmem = ocmem_hdl->len; + + return 0; +} + +void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem) +{ + if (adreno_ocmem && adreno_ocmem->base) + ocmem_free(adreno_ocmem->ocmem, OCMEM_GRAPHICS, + adreno_ocmem->hdl); +} + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs, int nr_rings) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index c7441fb8313e..e71a7570ef72 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -126,6 +126,12 @@ struct adreno_gpu { }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) +struct adreno_ocmem { + struct ocmem *ocmem; + unsigned long base; + void *hdl; +}; + /* platform config data (ie. from DT, or pdata) */ struct adreno_platform_config { struct adreno_rev rev; @@ -206,6 +212,11 @@ static inline int adreno_is_a430(struct adreno_gpu *gpu) return gpu->revn == 430; } +static inline int adreno_is_a510(struct adreno_gpu *gpu) +{ + return gpu->revn == 510; +} + static inline int adreno_is_a530(struct adreno_gpu *gpu) { return gpu->revn == 530; @@ -236,6 +247,10 @@ void adreno_dump(struct msm_gpu *gpu); void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); +int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, + struct adreno_ocmem *ocmem); +void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *ocmem); + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, int nr_rings); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c index cdbea38b8697..f1bc6a1af7a7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c @@ -55,8 +55,7 @@ static void dpu_core_irq_callback_handler(void *arg, int irq_idx) int dpu_core_irq_idx_lookup(struct dpu_kms *dpu_kms, enum dpu_intr_type intr_type, u32 instance_idx) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.irq_idx_lookup) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.irq_idx_lookup) return -EINVAL; return dpu_kms->hw_intr->ops.irq_idx_lookup(intr_type, @@ -73,7 +72,7 @@ static int _dpu_core_irq_enable(struct dpu_kms *dpu_kms, int irq_idx) unsigned long irq_flags; int ret = 0, enable_count; - if (!dpu_kms || !dpu_kms->hw_intr || + if (!dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts || !dpu_kms->irq_obj.irq_counts) { DPU_ERROR("invalid params\n"); @@ -114,7 +113,7 @@ int dpu_core_irq_enable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) { int i, ret = 0, counts; - if (!dpu_kms || !irq_idxs || !irq_count) { + if (!irq_idxs || !irq_count) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -138,7 +137,7 @@ static int _dpu_core_irq_disable(struct dpu_kms *dpu_kms, int irq_idx) { int ret = 0, enable_count; - if (!dpu_kms || !dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) { + if (!dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -169,7 +168,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) { int i, ret = 0, counts; - if (!dpu_kms || !irq_idxs || !irq_count) { + if (!irq_idxs || !irq_count) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -186,7 +185,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, int irq_idx, bool clear) { - if (!dpu_kms || !dpu_kms->hw_intr || + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.get_interrupt_status) return 0; @@ -205,7 +204,7 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int irq_idx, { unsigned long irq_flags; - if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) { + if (!dpu_kms->irq_obj.irq_cb_tbl) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -240,7 +239,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, { unsigned long irq_flags; - if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) { + if (!dpu_kms->irq_obj.irq_cb_tbl) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -274,8 +273,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.clear_all_irqs) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.clear_all_irqs) return; dpu_kms->hw_intr->ops.clear_all_irqs(dpu_kms->hw_intr); @@ -283,8 +281,7 @@ static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms) static void dpu_disable_all_irqs(struct dpu_kms *dpu_kms) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.disable_all_irqs) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.disable_all_irqs) return; dpu_kms->hw_intr->ops.disable_all_irqs(dpu_kms->hw_intr); @@ -343,18 +340,8 @@ void dpu_debugfs_core_irq_init(struct dpu_kms *dpu_kms, void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) { - struct msm_drm_private *priv; int i; - if (!dpu_kms->dev) { - DPU_ERROR("invalid drm device\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device private\n"); - return; - } - priv = dpu_kms->dev->dev_private; - pm_runtime_get_sync(&dpu_kms->pdev->dev); dpu_clear_all_irqs(dpu_kms); dpu_disable_all_irqs(dpu_kms); @@ -379,18 +366,8 @@ void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) void dpu_core_irq_uninstall(struct dpu_kms *dpu_kms) { - struct msm_drm_private *priv; int i; - if (!dpu_kms->dev) { - DPU_ERROR("invalid drm device\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device private\n"); - return; - } - priv = dpu_kms->dev->dev_private; - pm_runtime_get_sync(&dpu_kms->pdev->dev); for (i = 0; i < dpu_kms->irq_obj.total_irqs; i++) if (atomic_read(&dpu_kms->irq_obj.enable_counts[i]) || diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 09a49b59bb5b..11f2bebe3869 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -32,18 +32,7 @@ enum dpu_perf_mode { static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) { struct msm_drm_private *priv; - - if (!crtc->dev || !crtc->dev->dev_private) { - DPU_ERROR("invalid device\n"); - return NULL; - } - priv = crtc->dev->dev_private; - if (!priv || !priv->kms) { - DPU_ERROR("invalid kms\n"); - return NULL; - } - return to_dpu_kms(priv->kms); } @@ -116,7 +105,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc *crtc, } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid parameters\n"); return 0; } @@ -215,7 +204,6 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc) { struct dpu_crtc *dpu_crtc; - struct dpu_crtc_state *dpu_cstate; struct dpu_kms *kms; if (!crtc) { @@ -224,13 +212,12 @@ void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc) } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid kms\n"); return; } dpu_crtc = to_dpu_crtc(crtc); - dpu_cstate = to_dpu_crtc_state(crtc->state); if (atomic_dec_return(&kms->bandwidth_ref) > 0) return; @@ -287,7 +274,6 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc, u64 clk_rate = 0; struct dpu_crtc *dpu_crtc; struct dpu_crtc_state *dpu_cstate; - struct msm_drm_private *priv; struct dpu_kms *kms; int ret; @@ -297,11 +283,10 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc, } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid kms\n"); return -EINVAL; } - priv = kms->dev->dev_private; dpu_crtc = to_dpu_crtc(crtc); dpu_cstate = to_dpu_crtc_state(crtc->state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index ce59adff06aa..f197dce54576 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -266,11 +266,20 @@ enum dpu_intf_mode dpu_crtc_get_intf_mode(struct drm_crtc *crtc) { struct drm_encoder *encoder; - if (!crtc || !crtc->dev) { + if (!crtc) { DPU_ERROR("invalid crtc\n"); return INTF_MODE_NONE; } + /* + * TODO: This function is called from dpu debugfs and as part of atomic + * check. When called from debugfs, the crtc->mutex must be held to + * read crtc->state. However reading crtc->state from atomic check isn't + * allowed (unless you have a good reason, a big comment, and a deep + * understanding of how the atomic/modeset locks work (<- and this is + * probably not possible)). So we'll keep the WARN_ON here for now, but + * really we need to figure out a better way to track our operating mode + */ WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); /* TODO: Returns the first INTF_MODE, could there be multiple values? */ @@ -694,7 +703,7 @@ static void dpu_crtc_disable(struct drm_crtc *crtc, unsigned long flags; bool release_bandwidth = false; - if (!crtc || !crtc->dev || !crtc->dev->dev_private || !crtc->state) { + if (!crtc || !crtc->state) { DPU_ERROR("invalid crtc\n"); return; } @@ -766,7 +775,7 @@ static void dpu_crtc_enable(struct drm_crtc *crtc, struct msm_drm_private *priv; bool request_bandwidth; - if (!crtc || !crtc->dev || !crtc->dev->dev_private) { + if (!crtc) { DPU_ERROR("invalid crtc\n"); return; } @@ -1288,13 +1297,8 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, { struct drm_crtc *crtc = NULL; struct dpu_crtc *dpu_crtc = NULL; - struct msm_drm_private *priv = NULL; - struct dpu_kms *kms = NULL; int i; - priv = dev->dev_private; - kms = to_dpu_kms(priv->kms); - dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL); if (!dpu_crtc) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index d82ea994063f..f96e142c4361 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -645,11 +645,6 @@ static void _dpu_encoder_update_vsync_source(struct dpu_encoder_virt *dpu_enc, priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } - hw_mdptop = dpu_kms->hw_mdp; if (!hw_mdptop) { DPU_ERROR("invalid mdptop\n"); @@ -735,8 +730,7 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc, struct msm_drm_private *priv; bool is_vid_mode = false; - if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private || - !drm_enc->crtc) { + if (!drm_enc || !drm_enc->dev || !drm_enc->crtc) { DPU_ERROR("invalid parameters\n"); return -EINVAL; } @@ -1092,17 +1086,13 @@ static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc) struct msm_drm_private *priv; struct dpu_kms *dpu_kms; - if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc || !drm_enc->dev) { DPU_ERROR("invalid parameters\n"); return; } priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } dpu_enc = to_dpu_encoder_virt(drm_enc); if (!dpu_enc || !dpu_enc->cur_master) { @@ -1184,7 +1174,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) struct dpu_encoder_virt *dpu_enc = NULL; struct msm_drm_private *priv; struct dpu_kms *dpu_kms; - struct drm_display_mode *mode; int i = 0; if (!drm_enc) { @@ -1193,9 +1182,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) } else if (!drm_enc->dev) { DPU_ERROR("invalid dev\n"); return; - } else if (!drm_enc->dev->dev_private) { - DPU_ERROR("invalid dev_private\n"); - return; } dpu_enc = to_dpu_encoder_virt(drm_enc); @@ -1204,8 +1190,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) mutex_lock(&dpu_enc->enc_lock); dpu_enc->enabled = false; - mode = &drm_enc->crtc->state->adjusted_mode; - priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); @@ -1734,8 +1718,7 @@ static void dpu_encoder_vsync_event_handler(struct timer_list *t) struct msm_drm_private *priv; struct msm_drm_thread *event_thread; - if (!drm_enc->dev || !drm_enc->dev->dev_private || - !drm_enc->crtc) { + if (!drm_enc->dev || !drm_enc->crtc) { DPU_ERROR("invalid parameters\n"); return; } @@ -1914,8 +1897,6 @@ static int _dpu_encoder_debugfs_status_open(struct inode *inode, static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) { struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); - struct msm_drm_private *priv; - struct dpu_kms *dpu_kms; int i; static const struct file_operations debugfs_status_fops = { @@ -1927,14 +1908,11 @@ static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) char name[DPU_NAME_SIZE]; - if (!drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc->dev) { DPU_ERROR("invalid encoder or kms\n"); return -EINVAL; } - priv = drm_enc->dev->dev_private; - dpu_kms = to_dpu_kms(priv->kms); - snprintf(name, DPU_NAME_SIZE, "encoder%u", drm_enc->base.id); /* create overall sub-directory for the encoder */ @@ -2042,9 +2020,8 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc, enum dpu_intf_type intf_type; struct dpu_enc_phys_init_params phys_params; - if (!dpu_enc || !dpu_kms) { - DPU_ERROR("invalid arg(s), enc %d kms %d\n", - dpu_enc != 0, dpu_kms != 0); + if (!dpu_enc) { + DPU_ERROR("invalid arg(s), enc %d\n", dpu_enc != 0); return -EINVAL; } @@ -2133,14 +2110,12 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) struct dpu_encoder_virt *dpu_enc = from_timer(dpu_enc, t, frame_done_timer); struct drm_encoder *drm_enc = &dpu_enc->base; - struct msm_drm_private *priv; u32 event; - if (!drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc->dev) { DPU_ERROR("invalid parameters\n"); return; } - priv = drm_enc->dev->dev_private; if (!dpu_enc->frame_busy_mask[0] || !dpu_enc->crtc_frame_event_cb) { DRM_DEBUG_KMS("id:%u invalid timeout frame_busy_mask=%lu\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index 2923b63d95fe..047960949fbb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -124,13 +124,11 @@ static void dpu_encoder_phys_cmd_pp_rd_ptr_irq(void *arg, int irq_idx) static void dpu_encoder_phys_cmd_ctl_start_irq(void *arg, int irq_idx) { struct dpu_encoder_phys *phys_enc = arg; - struct dpu_encoder_phys_cmd *cmd_enc; if (!phys_enc || !phys_enc->hw_ctl) return; DPU_ATRACE_BEGIN("ctl_start_irq"); - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); atomic_add_unless(&phys_enc->pending_ctlstart_cnt, -1, 0); @@ -316,13 +314,9 @@ end: static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc, bool enable) { - struct dpu_encoder_phys_cmd *cmd_enc; - if (!phys_enc) return; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_pp->idx - PINGPONG_0, enable, atomic_read(&phys_enc->vblank_refcount)); @@ -355,7 +349,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config( struct drm_display_mode *mode; bool tc_enable = true; u32 vsync_hz; - struct msm_drm_private *priv; struct dpu_kms *dpu_kms; if (!phys_enc || !phys_enc->hw_pp) { @@ -373,11 +366,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config( } dpu_kms = phys_enc->dpu_kms; - if (!dpu_kms || !dpu_kms->dev || !dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device\n"); - return; - } - priv = dpu_kms->dev->dev_private; /* * TE default: dsi byte clock calculated base on 70 fps; @@ -650,13 +638,10 @@ static int dpu_encoder_phys_cmd_wait_for_tx_complete( struct dpu_encoder_phys *phys_enc) { int rc; - struct dpu_encoder_phys_cmd *cmd_enc; if (!phys_enc) return -EINVAL; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - rc = _dpu_encoder_phys_cmd_wait_for_idle(phys_enc); if (rc) { DRM_ERROR("failed wait_for_idle: id:%u ret:%d intf:%d\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index b9c84fb4d4a1..3123ef873cdf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -374,7 +374,7 @@ static void dpu_encoder_phys_vid_mode_set( struct drm_display_mode *mode, struct drm_display_mode *adj_mode) { - if (!phys_enc || !phys_enc->dpu_kms) { + if (!phys_enc) { DPU_ERROR("invalid encoder/kms\n"); return; } @@ -566,16 +566,13 @@ static void dpu_encoder_phys_vid_prepare_for_kickoff( static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) { - struct msm_drm_private *priv; unsigned long lock_flags; int ret; - if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev || - !phys_enc->parent->dev->dev_private) { + if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev) { DPU_ERROR("invalid encoder/device\n"); return; } - priv = phys_enc->parent->dev->dev_private; if (!phys_enc->hw_intf || !phys_enc->hw_ctl) { DPU_ERROR("invalid hw_intf %d hw_ctl %d\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 58b0485dc375..6c92f0fbeac9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -30,10 +30,6 @@ #define CREATE_TRACE_POINTS #include "dpu_trace.h" -static const char * const iommu_ports[] = { - "mdp_0", -}; - /* * To enable overall DRM driver logging * # echo 0x2 > /sys/module/drm/parameters/debug @@ -68,16 +64,14 @@ static int _dpu_danger_signal_status(struct seq_file *s, bool danger_status) { struct dpu_kms *kms = (struct dpu_kms *)s->private; - struct msm_drm_private *priv; struct dpu_danger_safe_status status; int i; - if (!kms->dev || !kms->dev->dev_private || !kms->hw_mdp) { + if (!kms->hw_mdp) { DPU_ERROR("invalid arg(s)\n"); return 0; } - priv = kms->dev->dev_private; memset(&status, 0, sizeof(struct dpu_danger_safe_status)); pm_runtime_get_sync(&kms->pdev->dev); @@ -153,13 +147,7 @@ static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data) return 0; dev = dpu_kms->dev; - if (!dev) - return 0; - priv = dev->dev_private; - if (!priv) - return 0; - base = dpu_kms->mmio + regset->offset; /* insert padding spaces, if needed */ @@ -280,7 +268,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state) { struct dpu_kms *dpu_kms; - struct msm_drm_private *priv; struct drm_device *dev; struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; @@ -292,10 +279,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms, dpu_kms = to_dpu_kms(kms); dev = dpu_kms->dev; - if (!dev || !dev->dev_private) - return; - priv = dev->dev_private; - /* Call prepare_commit for all affected encoders */ for_each_new_crtc_in_state(state, crtc, crtc_state, i) { drm_for_each_encoder_mask(encoder, crtc->dev, @@ -333,7 +316,6 @@ void dpu_kms_encoder_enable(struct drm_encoder *encoder) if (funcs && funcs->commit) funcs->commit(encoder); - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); drm_for_each_crtc(crtc, dev) { if (!(crtc->state->encoder_mask & drm_encoder_mask(encoder))) continue; @@ -464,16 +446,6 @@ static void _dpu_kms_drm_obj_destroy(struct dpu_kms *dpu_kms) struct msm_drm_private *priv; int i; - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } else if (!dpu_kms->dev) { - DPU_ERROR("invalid dev\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid dev_private\n"); - return; - } priv = dpu_kms->dev->dev_private; for (i = 0; i < priv->num_crtcs; i++) @@ -505,7 +477,6 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms) int primary_planes_idx = 0, cursor_planes_idx = 0, i, ret; int max_crtc_count; - dev = dpu_kms->dev; priv = dev->dev_private; catalog = dpu_kms->catalog; @@ -585,8 +556,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) int i; dev = dpu_kms->dev; - if (!dev) - return; if (dpu_kms->hw_intr) dpu_hw_intr_destroy(dpu_kms->hw_intr); @@ -725,8 +694,7 @@ static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms) mmu = dpu_kms->base.aspace->mmu; - mmu->funcs->detach(mmu, (const char **)iommu_ports, - ARRAY_SIZE(iommu_ports)); + mmu->funcs->detach(mmu); msm_gem_address_space_put(dpu_kms->base.aspace); dpu_kms->base.aspace = NULL; @@ -752,8 +720,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) return PTR_ERR(aspace); } - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { DPU_ERROR("failed to attach iommu %d\n", ret); msm_gem_address_space_put(aspace); @@ -803,16 +770,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) dpu_kms = to_dpu_kms(kms); dev = dpu_kms->dev; - if (!dev) { - DPU_ERROR("invalid device\n"); - return rc; - } - priv = dev->dev_private; - if (!priv) { - DPU_ERROR("invalid private data\n"); - return rc; - } atomic_set(&dpu_kms->bandwidth_ref, 0); @@ -974,7 +932,7 @@ struct msm_kms *dpu_kms_init(struct drm_device *dev) struct dpu_kms *dpu_kms; int irq; - if (!dev || !dev->dev_private) { + if (!dev) { DPU_ERROR("drm device node invalid\n"); return ERR_PTR(-EINVAL); } @@ -1064,11 +1022,6 @@ static int __maybe_unused dpu_runtime_suspend(struct device *dev) struct dss_module_power *mp = &dpu_kms->mp; ddev = dpu_kms->dev; - if (!ddev) { - DPU_ERROR("invalid drm_device\n"); - return rc; - } - rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, false); if (rc) DPU_ERROR("clock disable failed rc:%d\n", rc); @@ -1086,11 +1039,6 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev) struct dss_module_power *mp = &dpu_kms->mp; ddev = dpu_kms->dev; - if (!ddev) { - DPU_ERROR("invalid drm_device\n"); - return rc; - } - rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true); if (rc) { DPU_ERROR("clock enable failed rc:%d\n", rc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 959d03e007fa..c6169e7df19d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -139,10 +139,6 @@ struct vsync_info { #define to_dpu_kms(x) container_of(x, struct dpu_kms, base) -/* get struct msm_kms * from drm_device * */ -#define ddev_to_msm_kms(D) ((D) && (D)->dev_private ? \ - ((struct msm_drm_private *)((D)->dev_private))->kms : NULL) - /** * Debugfs functions - extra helper functions for debugfs support * diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c index 8d24b79fd400..991f4c8f8a12 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c @@ -154,10 +154,6 @@ void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms, u32 ot_lim; int ret, i; - if (!dpu_kms) { - DPU_ERROR("invalid arguments\n"); - return; - } mdp = dpu_kms->hw_mdp; for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { @@ -214,7 +210,7 @@ void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms, const struct dpu_vbif_qos_tbl *qos_tbl; int i; - if (!dpu_kms || !params || !dpu_kms->hw_mdp) { + if (!params || !dpu_kms->hw_mdp) { DPU_ERROR("invalid arguments\n"); return; } diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 50711ccc8691..dda05436f716 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -157,10 +157,6 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, } } -static const char * const iommu_ports[] = { - "mdp_port0_cb0", "mdp_port1_cb0", -}; - static void mdp4_destroy(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); @@ -172,8 +168,7 @@ static void mdp4_destroy(struct msm_kms *kms) drm_gem_object_put_unlocked(mdp4_kms->blank_cursor_bo); if (aspace) { - aspace->mmu->funcs->detach(aspace->mmu, - iommu_ports, ARRAY_SIZE(iommu_ports)); + aspace->mmu->funcs->detach(aspace->mmu); msm_gem_address_space_put(aspace); } @@ -524,8 +519,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) kms->aspace = aspace; - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) goto fail; } else { diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index f6e71ff539ca..1f48f64539a2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -14,7 +14,7 @@ struct mdp5_cfg_handler { /* mdp5_cfg must be exposed (used in mdp5.xml.h) */ const struct mdp5_cfg_hw *mdp5_cfg = NULL; -const struct mdp5_cfg_hw msm8x74v1_config = { +static const struct mdp5_cfg_hw msm8x74v1_config = { .name = "msm8x74v1", .mdp = { .count = 1, @@ -98,7 +98,7 @@ const struct mdp5_cfg_hw msm8x74v1_config = { .max_clk = 200000000, }; -const struct mdp5_cfg_hw msm8x74v2_config = { +static const struct mdp5_cfg_hw msm8x74v2_config = { .name = "msm8x74", .mdp = { .count = 1, @@ -180,7 +180,7 @@ const struct mdp5_cfg_hw msm8x74v2_config = { .max_clk = 200000000, }; -const struct mdp5_cfg_hw apq8084_config = { +static const struct mdp5_cfg_hw apq8084_config = { .name = "apq8084", .mdp = { .count = 1, @@ -275,7 +275,7 @@ const struct mdp5_cfg_hw apq8084_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8x16_config = { +static const struct mdp5_cfg_hw msm8x16_config = { .name = "msm8x16", .mdp = { .count = 1, @@ -342,7 +342,7 @@ const struct mdp5_cfg_hw msm8x16_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8x94_config = { +static const struct mdp5_cfg_hw msm8x94_config = { .name = "msm8x94", .mdp = { .count = 1, @@ -437,7 +437,7 @@ const struct mdp5_cfg_hw msm8x94_config = { .max_clk = 400000000, }; -const struct mdp5_cfg_hw msm8x96_config = { +static const struct mdp5_cfg_hw msm8x96_config = { .name = "msm8x96", .mdp = { .count = 1, @@ -545,7 +545,104 @@ const struct mdp5_cfg_hw msm8x96_config = { .max_clk = 412500000, }; -const struct mdp5_cfg_hw msm8917_config = { +const struct mdp5_cfg_hw msm8x76_config = { + .name = "msm8x76", + .mdp = { + .count = 1, + .caps = MDP_CAP_SMP | + MDP_CAP_DSC | + MDP_CAP_SRC_SPLIT | + 0, + }, + .ctl = { + .count = 3, + .base = { 0x01000, 0x01200, 0x01400 }, + .flush_hw_mask = 0xffffffff, + }, + .smp = { + .mmb_count = 10, + .mmb_size = 10240, + .clients = { + [SSPP_VIG0] = 1, [SSPP_VIG1] = 9, + [SSPP_DMA0] = 4, + [SSPP_RGB0] = 7, [SSPP_RGB1] = 8, + }, + }, + .pipe_vig = { + .count = 2, + .base = { 0x04000, 0x06000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SCALE | + MDP_PIPE_CAP_CSC | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_rgb = { + .count = 2, + .base = { 0x14000, 0x16000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_dma = { + .count = 1, + .base = { 0x24000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_cursor = { + .count = 1, + .base = { 0x440DC }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + MDP_PIPE_CAP_CURSOR | + 0, + }, + .lm = { + .count = 2, + .base = { 0x44000, 0x45000 }, + .instances = { + { .id = 0, .pp = 0, .dspp = 0, + .caps = MDP_LM_CAP_DISPLAY, }, + { .id = 1, .pp = -1, .dspp = -1, + .caps = MDP_LM_CAP_WB }, + }, + .nb_stages = 8, + .max_width = 2560, + .max_height = 0xFFFF, + }, + .dspp = { + .count = 1, + .base = { 0x54000 }, + + }, + .pp = { + .count = 3, + .base = { 0x70000, 0x70800, 0x72000 }, + }, + .dsc = { + .count = 2, + .base = { 0x80000, 0x80400 }, + }, + .intf = { + .base = { 0x6a000, 0x6a800, 0x6b000 }, + .connect = { + [0] = INTF_DISABLED, + [1] = INTF_DSI, + [2] = INTF_DSI, + }, + }, + .max_clk = 360000000, +}; + +static const struct mdp5_cfg_hw msm8917_config = { .name = "msm8917", .mdp = { .count = 1, @@ -630,7 +727,7 @@ const struct mdp5_cfg_hw msm8917_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8998_config = { +static const struct mdp5_cfg_hw msm8998_config = { .name = "msm8998", .mdp = { .count = 1, @@ -745,6 +842,7 @@ static const struct mdp5_cfg_handler cfg_handlers_v1[] = { { .revision = 6, .config = { .hw = &msm8x16_config } }, { .revision = 9, .config = { .hw = &msm8x94_config } }, { .revision = 7, .config = { .hw = &msm8x96_config } }, + { .revision = 11, .config = { .hw = &msm8x76_config } }, { .revision = 15, .config = { .hw = &msm8917_config } }, }; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index eb0b4b7dc7cc..05cc04f729d6 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -214,7 +214,6 @@ static void blend_setup(struct drm_crtc *crtc) struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline; struct mdp5_kms *mdp5_kms = get_kms(crtc); struct drm_plane *plane; - const struct mdp5_cfg_hw *hw_cfg; struct mdp5_plane_state *pstate, *pstates[STAGE_MAX + 1] = {NULL}; const struct mdp_format *format; struct mdp5_hw_mixer *mixer = pipeline->mixer; @@ -232,8 +231,6 @@ static void blend_setup(struct drm_crtc *crtc) u32 val; #define blender(stage) ((stage) - STAGE0) - hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); - spin_lock_irqsave(&mdp5_crtc->lm_lock, flags); /* ctl could be released already when we are shutting down: */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 91cd76a2bab1..e43ecd4be10a 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -19,10 +19,6 @@ #include "msm_mmu.h" #include "mdp5_kms.h" -static const char *iommu_ports[] = { - "mdp_0", -}; - static int mdp5_hw_init(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); @@ -233,8 +229,7 @@ static void mdp5_kms_destroy(struct msm_kms *kms) mdp5_pipe_destroy(mdp5_kms->hwpipes[i]); if (aspace) { - aspace->mmu->funcs->detach(aspace->mmu, - iommu_ports, ARRAY_SIZE(iommu_ports)); + aspace->mmu->funcs->detach(aspace->mmu); msm_gem_address_space_put(aspace); } } @@ -314,6 +309,10 @@ int mdp5_disable(struct mdp5_kms *mdp5_kms) mdp5_kms->enable_count--; WARN_ON(mdp5_kms->enable_count < 0); + if (mdp5_kms->tbu_rt_clk) + clk_disable_unprepare(mdp5_kms->tbu_rt_clk); + if (mdp5_kms->tbu_clk) + clk_disable_unprepare(mdp5_kms->tbu_clk); clk_disable_unprepare(mdp5_kms->ahb_clk); clk_disable_unprepare(mdp5_kms->axi_clk); clk_disable_unprepare(mdp5_kms->core_clk); @@ -334,6 +333,10 @@ int mdp5_enable(struct mdp5_kms *mdp5_kms) clk_prepare_enable(mdp5_kms->core_clk); if (mdp5_kms->lut_clk) clk_prepare_enable(mdp5_kms->lut_clk); + if (mdp5_kms->tbu_clk) + clk_prepare_enable(mdp5_kms->tbu_clk); + if (mdp5_kms->tbu_rt_clk) + clk_prepare_enable(mdp5_kms->tbu_rt_clk); return 0; } @@ -466,14 +469,11 @@ static int modeset_init(struct mdp5_kms *mdp5_kms) { struct drm_device *dev = mdp5_kms->dev; struct msm_drm_private *priv = dev->dev_private; - const struct mdp5_cfg_hw *hw_cfg; unsigned int num_crtcs; int i, ret, pi = 0, ci = 0; struct drm_plane *primary[MAX_BASES] = { NULL }; struct drm_plane *cursor[MAX_BASES] = { NULL }; - hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); - /* * Construct encoders and modeset initialize connector devices * for each external display interface. @@ -737,8 +737,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) kms->aspace = aspace; - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { DRM_DEV_ERROR(&pdev->dev, "failed to attach iommu: %d\n", ret); @@ -974,6 +973,8 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) /* optional clocks: */ get_clk(pdev, &mdp5_kms->lut_clk, "lut", false); + get_clk(pdev, &mdp5_kms->tbu_clk, "tbu", false); + get_clk(pdev, &mdp5_kms->tbu_rt_clk, "tbu_rt", false); /* we need to set a default rate before enabling. Set a safe * rate first, then figure out hw revision, and then set a diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h index d1bf4fdfc815..128866742593 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h @@ -53,6 +53,8 @@ struct mdp5_kms { struct clk *ahb_clk; struct clk *core_clk; struct clk *lut_clk; + struct clk *tbu_clk; + struct clk *tbu_rt_clk; struct clk *vsync_clk; /* diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c index b31cfb554fa2..d7fa2c49e741 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c @@ -121,7 +121,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp, struct mdp5_kms *mdp5_kms = get_kms(smp); int rev = mdp5_cfg_get_hw_rev(mdp5_kms->cfg); int i, hsub, nplanes, nlines; - u32 fmt = format->base.pixel_format; uint32_t blkcfg = 0; nplanes = info->num_planes; @@ -135,7 +134,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp, * them together, writes to SMP using a single client. */ if ((rev > 0) && (format->chroma_sample > CHROMA_FULL)) { - fmt = DRM_FORMAT_NV24; nplanes = 2; /* if decimation is enabled, HW decimates less on the diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index b7b7c1a9164a..86ad3fdf207d 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -66,6 +66,26 @@ static const struct msm_dsi_config msm8916_dsi_cfg = { .num_dsi = 1, }; +static const char * const dsi_8976_bus_clk_names[] = { + "mdp_core", "iface", "bus", +}; + +static const struct msm_dsi_config msm8976_dsi_cfg = { + .io_offset = DSI_6G_REG_SHIFT, + .reg_cfg = { + .num = 3, + .regs = { + {"gdsc", -1, -1}, + {"vdda", 100000, 100}, /* 1.2 V */ + {"vddio", 100000, 100}, /* 1.8 V */ + }, + }, + .bus_clk_names = dsi_8976_bus_clk_names, + .num_bus_clks = ARRAY_SIZE(dsi_8976_bus_clk_names), + .io_start = { 0x1a94000, 0x1a96000 }, + .num_dsi = 2, +}; + static const struct msm_dsi_config msm8994_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { @@ -147,7 +167,7 @@ static const struct msm_dsi_config sdm845_dsi_cfg = { .num_dsi = 2, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { .link_clk_enable = dsi_link_clk_enable_v2, .link_clk_disable = dsi_link_clk_disable_v2, .clk_init_ver = dsi_clk_init_v2, @@ -158,7 +178,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { .calc_clk_rate = dsi_calc_clk_rate_v2, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = NULL, @@ -169,7 +189,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { .calc_clk_rate = dsi_calc_clk_rate_6g, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = { .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = dsi_clk_init_6g_v2, @@ -197,6 +217,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &msm8916_dsi_cfg, &msm_dsi_6g_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_1, &msm8996_dsi_cfg, &msm_dsi_6g_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_2, + &msm8976_dsi_cfg, &msm_dsi_6g_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_0, &msm8998_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_1, diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index e2b7a7dfbe49..50a37ceb6a25 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -17,6 +17,7 @@ #define MSM_DSI_6G_VER_MINOR_V1_3 0x10030000 #define MSM_DSI_6G_VER_MINOR_V1_3_1 0x10030001 #define MSM_DSI_6G_VER_MINOR_V1_4_1 0x10040001 +#define MSM_DSI_6G_VER_MINOR_V1_4_2 0x10040002 #define MSM_DSI_6G_VER_MINOR_V2_2_0 0x20000000 #define MSM_DSI_6G_VER_MINOR_V2_2_1 0x20020001 diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 1e7b1be25bb0..458cec82ae13 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1293,14 +1293,13 @@ static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len) static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host, u8 *buf, int rx_byte, int pkt_size) { - u32 *lp, *temp, data; + u32 *temp, data; int i, j = 0, cnt; u32 read_cnt; u8 reg[16]; int repeated_bytes = 0; int buf_offset = buf - msm_host->rx_buf; - lp = (u32 *)buf; temp = (u32 *)reg; cnt = (rx_byte + 3) >> 2; if (cnt > 4) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 3522863a4984..b0cfa67d2a57 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -145,7 +145,7 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, { const unsigned long bit_rate = clk_req->bitclk_rate; const unsigned long esc_rate = clk_req->escclk_rate; - s32 ui, ui_x8, lpx; + s32 ui, ui_x8; s32 tmax, tmin; s32 pcnt0 = 50; s32 pcnt1 = 50; @@ -175,7 +175,6 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); ui_x8 = ui << 3; - lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); temp = S_DIV_ROUND_UP(38 * coeff - val_ckln * ui, ui_x8); tmin = max_t(s32, temp, 0); @@ -262,7 +261,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, { const unsigned long bit_rate = clk_req->bitclk_rate; const unsigned long esc_rate = clk_req->escclk_rate; - s32 ui, ui_x8, lpx; + s32 ui, ui_x8; s32 tmax, tmin; s32 pcnt0 = 50; s32 pcnt1 = 50; @@ -284,7 +283,6 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); ui_x8 = ui << 3; - lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); temp = S_DIV_ROUND_UP(38 * coeff, ui_x8); tmin = max_t(s32, temp, 0); @@ -485,6 +483,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { #ifdef CONFIG_DRM_MSM_DSI_28NM_PHY { .compatible = "qcom,dsi-phy-28nm-hpm", .data = &dsi_phy_28nm_hpm_cfgs }, + { .compatible = "qcom,dsi-phy-28nm-hpm-fam-b", + .data = &dsi_phy_28nm_hpm_famb_cfgs }, { .compatible = "qcom,dsi-phy-28nm-lp", .data = &dsi_phy_28nm_lp_cfgs }, #endif diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c4069ce6afe6..24b294ed3059 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -40,6 +40,7 @@ struct msm_dsi_phy_cfg { }; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index b3f678f6c2aa..c3c580cfd8b1 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -39,15 +39,10 @@ static void dsi_28nm_dphy_set_timing(struct msm_dsi_phy *phy, DSI_28nm_PHY_TIMING_CTRL_11_TRIG3_CMD(0)); } -static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) +static void dsi_28nm_phy_regulator_enable_dcdc(struct msm_dsi_phy *phy) { void __iomem *base = phy->reg_base; - if (!enable) { - dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); - return; - } - dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 1); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0); @@ -56,6 +51,39 @@ static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x9); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x7); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20); + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00); +} + +static void dsi_28nm_phy_regulator_enable_ldo(struct msm_dsi_phy *phy) +{ + void __iomem *base = phy->reg_base; + + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0x7); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_3, 0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_2, 0x1); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x1); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20); + + if (phy->cfg->type == MSM_DSI_PHY_28NM_LP) + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x05); + else + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x0d); +} + +static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) +{ + if (!enable) { + dsi_phy_write(phy->reg_base + + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); + return; + } + + if (phy->regulator_ldo_mode) + dsi_28nm_phy_regulator_enable_ldo(phy); + else + dsi_28nm_phy_regulator_enable_dcdc(phy); } static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, @@ -77,8 +105,6 @@ static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, dsi_28nm_phy_regulator_ctrl(phy, true); - dsi_phy_write(base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00); - dsi_28nm_dphy_set_timing(phy, timing); dsi_phy_write(base + REG_DSI_28nm_PHY_CTRL_1, 0x00); @@ -142,6 +168,24 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs = { .num_dsi_phy = 2, }; +const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs = { + .type = MSM_DSI_PHY_28NM_HPM, + .src_pll_truthtable = { {true, true}, {false, true} }, + .reg_cfg = { + .num = 1, + .regs = { + {"vddio", 100000, 100}, + }, + }, + .ops = { + .enable = dsi_28nm_phy_enable, + .disable = dsi_28nm_phy_disable, + .init = msm_dsi_phy_init_common, + }, + .io_start = { 0x1a94400, 0x1a96400 }, + .num_dsi_phy = 2, +}; + const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = { .type = MSM_DSI_PHY_28NM_LP, .src_pll_truthtable = { {true, true}, {true, true} }, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c index 1697e61f9c2f..8a38d4b95102 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c @@ -29,8 +29,12 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy) reg = devm_regulator_get(dev, cfg->reg_names[i]); if (IS_ERR(reg)) { ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev, "failed to get phy regulator: %s (%d)\n", - cfg->reg_names[i], ret); + if (ret != -EPROBE_DEFER) { + DRM_DEV_ERROR(dev, + "failed to get phy regulator: %s (%d)\n", + cfg->reg_names[i], ret); + } + return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index a052364a5d74..18f3a5c53ffb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -16,6 +16,7 @@ #include <linux/pm_opp.h> #include <linux/devfreq.h> #include <linux/devcoredump.h> +#include <linux/sched/task.h> /* * Power Management: @@ -838,7 +839,7 @@ msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, return ERR_CAST(aspace); } - ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { msm_gem_address_space_put(aspace); return ERR_PTR(ret); @@ -995,8 +996,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); if (!IS_ERR_OR_NULL(gpu->aspace)) { - gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, - NULL, 0); + gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); msm_gem_address_space_put(gpu->aspace); } } diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 34f643a0c28a..34980d8eb7ad 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -21,14 +21,12 @@ struct msm_gpummu { #define GPUMMU_PAGE_SIZE SZ_4K #define TABLE_SIZE (sizeof(uint32_t) * GPUMMU_VA_RANGE / GPUMMU_PAGE_SIZE) -static int msm_gpummu_attach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static int msm_gpummu_attach(struct msm_mmu *mmu) { return 0; } -static void msm_gpummu_detach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static void msm_gpummu_detach(struct msm_mmu *mmu) { } diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 8c95c31e2b12..ad58cfe5998e 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -23,16 +23,14 @@ static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, return 0; } -static int msm_iommu_attach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static int msm_iommu_attach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); return iommu_attach_device(iommu->domain, mmu->dev); } -static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static void msm_iommu_detach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 871d56303697..67a623f14319 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -10,8 +10,8 @@ #include <linux/iommu.h> struct msm_mmu_funcs { - int (*attach)(struct msm_mmu *mmu, const char * const *names, int cnt); - void (*detach)(struct msm_mmu *mmu, const char * const *names, int cnt); + int (*attach)(struct msm_mmu *mmu); + void (*detach)(struct msm_mmu *mmu); int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt, unsigned len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, unsigned len); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index c7832a951039..af7ceb246c7c 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -298,7 +298,7 @@ void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_submit *submit, int idx, - uint64_t iova, uint32_t size) + uint64_t iova, uint32_t size, bool full) { struct msm_gem_object *obj = submit->bos[idx].obj; unsigned offset = 0; @@ -318,6 +318,9 @@ static void snapshot_buf(struct msm_rd_state *rd, rd_write_section(rd, RD_GPUADDR, (uint32_t[3]){ iova, size, iova >> 32 }, 12); + if (!full) + return; + /* But only dump the contents of buffers marked READ */ if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) return; @@ -381,18 +384,21 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); for (i = 0; i < submit->nr_bos; i++) - if (should_dump(submit, i)) - snapshot_buf(rd, submit, i, 0, 0); + snapshot_buf(rd, submit, i, 0, 0, should_dump(submit, i)); for (i = 0; i < submit->nr_cmds; i++) { - uint64_t iova = submit->cmd[i].iova; uint32_t szd = submit->cmd[i].size; /* in dwords */ /* snapshot cmdstream bo's (if we haven't already): */ if (!should_dump(submit, i)) { snapshot_buf(rd, submit, submit->cmd[i].idx, - submit->cmd[i].iova, szd * 4); + submit->cmd[i].iova, szd * 4, true); } + } + + for (i = 0; i < submit->nr_cmds; i++) { + uint64_t iova = submit->cmd[i].iova; + uint32_t szd = submit->cmd[i].size; /* in dwords */ switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index e518d93ca6df..d08ae95ecc0a 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -843,9 +843,13 @@ fail: */ static void omap_gem_unpin_locked(struct drm_gem_object *obj) { + struct omap_drm_private *priv = obj->dev->dev_private; struct omap_gem_object *omap_obj = to_omap_bo(obj); int ret; + if (omap_gem_is_contiguous(omap_obj) || !priv->has_dmm) + return; + if (refcount_dec_and_test(&omap_obj->dma_addr_cnt)) { ret = tiler_unpin(omap_obj->block); if (ret) { diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index daff9a2af3be..acabeaf28732 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6965,8 +6965,8 @@ static int cik_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* XXX this should actually be a bus address, not an MC address. same on older asics */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 7089dfc8c2a9..110fb38004b1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1826,8 +1826,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) track->textures[i].tex_coord_type = 2; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index 840401413c58..f5f2ffea5ab2 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) track->textures[i].lookup_disable = true; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e937cc01910d..033bc466a862 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3696,8 +3696,8 @@ int r600_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 41f08321a361..fd74e2611185 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -379,10 +379,6 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { -#ifdef CONFIG_PPC64 - struct drm_device *ddev = pci_get_drvdata(pdev); -#endif - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ @@ -390,13 +386,14 @@ radeon_pci_shutdown(struct pci_dev *pdev) radeon_pci_remove(pdev); #ifdef CONFIG_PPC64 - /* Some adapters need to be suspended before a + /* + * Some adapters need to be suspended before a * shutdown occurs in order to prevent an error * during kexec. * Make this power specific becauase it breaks * some non-power boards. */ - radeon_suspend_kms(ddev, true, true, false); + radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false); #endif } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 05894d198a79..1d8efb0eefdb 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5997,8 +5997,8 @@ static int si_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 5b1f9ff97576..714af052fbef 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -837,16 +837,15 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, static void tegra_cursor_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct tegra_bo *bo = tegra_fb_get_plane(plane->state->fb, 0); + struct tegra_plane_state *state = to_tegra_plane_state(plane->state); struct tegra_dc *dc = to_tegra_dc(plane->state->crtc); - struct drm_plane_state *state = plane->state; u32 value = CURSOR_CLIP_DISPLAY; /* rien ne va plus */ if (!plane->state->crtc || !plane->state->fb) return; - switch (state->crtc_w) { + switch (plane->state->crtc_w) { case 32: value |= CURSOR_SIZE_32x32; break; @@ -864,16 +863,16 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, break; default: - WARN(1, "cursor size %ux%u not supported\n", state->crtc_w, - state->crtc_h); + WARN(1, "cursor size %ux%u not supported\n", + plane->state->crtc_w, plane->state->crtc_h); return; } - value |= (bo->iova >> 10) & 0x3fffff; + value |= (state->iova[0] >> 10) & 0x3fffff; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - value = (bo->iova >> 32) & 0x3; + value = (state->iova[0] >> 32) & 0x3; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); #endif @@ -892,7 +891,8 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL); /* position the cursor */ - value = (state->crtc_y & 0x3fff) << 16 | (state->crtc_x & 0x3fff); + value = (plane->state->crtc_y & 0x3fff) << 16 | + (plane->state->crtc_x & 0x3fff); tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); } @@ -2017,7 +2017,7 @@ static int tegra_dc_init(struct host1x_client *client) dev_warn(dc->dev, "failed to allocate syncpoint\n"); err = host1x_client_iommu_attach(client); - if (err < 0) { + if (err < 0 && err != -ENODEV) { dev_err(client->dev, "failed to attach to domain: %d\n", err); return err; } diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 56e5e7a5c108..f455ce71e85d 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -920,10 +920,8 @@ int host1x_client_iommu_attach(struct host1x_client *client) if (tegra->domain) { group = iommu_group_get(client->dev); - if (!group) { - dev_err(client->dev, "failed to get IOMMU group\n"); + if (!group) return -ENODEV; - } if (domain != tegra->domain) { err = iommu_attach_group(tegra->domain, group); @@ -1243,6 +1241,9 @@ static int host1x_drm_remove(struct host1x_device *dev) drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); + err = host1x_device_exit(dev); if (err < 0) dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err); diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 746dae32c484..bc15b430156d 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,6 +27,29 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put_unlocked(&obj->gem); } +/* XXX move this into lib/scatterlist.c? */ +static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg, + unsigned int nents, gfp_t gfp_mask) +{ + struct scatterlist *dst; + unsigned int i; + int err; + + err = sg_alloc_table(sgt, nents, gfp_mask); + if (err < 0) + return err; + + dst = sgt->sgl; + + for (i = 0; i < nents; i++) { + sg_set_page(dst, sg_page(sg), sg->length, 0); + dst = sg_next(dst); + sg = sg_next(sg); + } + + return 0; +} + static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, dma_addr_t *phys) { @@ -52,11 +75,31 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, return ERR_PTR(-ENOMEM); if (obj->pages) { + /* + * If the buffer object was allocated from the explicit IOMMU + * API code paths, construct an SG table from the pages. + */ err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, 0, obj->gem.size, GFP_KERNEL); if (err < 0) goto free; + } else if (obj->sgt) { + /* + * If the buffer object already has an SG table but no pages + * were allocated for it, it means the buffer was imported and + * the SG table needs to be copied to avoid overwriting any + * other potential users of the original SG table. + */ + err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, obj->sgt->nents, + GFP_KERNEL); + if (err < 0) + goto free; } else { + /* + * If the buffer object had no pages allocated and if it was + * not imported, it had to be allocated with the DMA API, so + * the DMA API helper can be used. + */ err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, obj->gem.size); if (err < 0) @@ -397,13 +440,6 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm, err = tegra_bo_iommu_map(tegra, bo); if (err < 0) goto detach; - } else { - if (bo->sgt->nents > 1) { - err = -EINVAL; - goto detach; - } - - bo->iova = sg_dma_address(bo->sgt->sgl); } bo->gem.import_attach = attach; diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 2b4082d0bc9e..47d985ac7cd7 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -605,11 +605,8 @@ static struct tegra_display_hub_state * tegra_display_hub_get_state(struct tegra_display_hub *hub, struct drm_atomic_state *state) { - struct drm_device *drm = dev_get_drvdata(hub->client.parent); struct drm_private_state *priv; - WARN_ON(!drm_modeset_is_locked(&drm->mode_config.connection_mutex)); - priv = drm_atomic_get_private_obj_state(state, &hub->base); if (IS_ERR(priv)) return ERR_CAST(priv); diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 163b590be224..cadcdd9ea427 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -129,6 +129,17 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) goto unpin; } + /* + * The display controller needs contiguous memory, so + * fail if the buffer is discontiguous and we fail to + * map its SG table to a single contiguous chunk of + * I/O virtual memory. + */ + if (err > 1) { + err = -EINVAL; + goto unpin; + } + state->iova[i] = sg_dma_address(sgt->sgl); state->sgt[i] = sgt; } else { diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 615cb319fa8b..a68d3b36b972 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3912,8 +3912,7 @@ static int tegra_sor_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int tegra_sor_suspend(struct device *dev) +static int tegra_sor_runtime_suspend(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; @@ -3935,7 +3934,7 @@ static int tegra_sor_suspend(struct device *dev) return 0; } -static int tegra_sor_resume(struct device *dev) +static int tegra_sor_runtime_resume(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; @@ -3967,10 +3966,39 @@ static int tegra_sor_resume(struct device *dev) return 0; } -#endif + +static int tegra_sor_suspend(struct device *dev) +{ + struct tegra_sor *sor = dev_get_drvdata(dev); + int err; + + if (sor->hdmi_supply) { + err = regulator_disable(sor->hdmi_supply); + if (err < 0) + return err; + } + + return 0; +} + +static int tegra_sor_resume(struct device *dev) +{ + struct tegra_sor *sor = dev_get_drvdata(dev); + int err; + + if (sor->hdmi_supply) { + err = regulator_enable(sor->hdmi_supply); + if (err < 0) + return err; + } + + return 0; +} static const struct dev_pm_ops tegra_sor_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_sor_suspend, tegra_sor_resume, NULL) + SET_RUNTIME_PM_OPS(tegra_sor_runtime_suspend, tegra_sor_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_sor_suspend, tegra_sor_resume) }; struct platform_driver tegra_sor_driver = { diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 9444ba183990..3526c2892ddb 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -167,7 +167,7 @@ static int vic_init(struct host1x_client *client) int err; err = host1x_client_iommu_attach(client); - if (err < 0) { + if (err < 0 && err != -ENODEV) { dev_err(vic->dev, "failed to attach to domain: %d\n", err); return err; } @@ -386,13 +386,14 @@ static const struct vic_config vic_t194_config = { .supports_sid = true, }; -static const struct of_device_id vic_match[] = { +static const struct of_device_id tegra_vic_of_match[] = { { .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config }, { .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config }, { .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config }, { .compatible = "nvidia,tegra194-vic", .data = &vic_t194_config }, { }, }; +MODULE_DEVICE_TABLE(of, tegra_vic_of_match); static int vic_probe(struct platform_device *pdev) { @@ -516,7 +517,7 @@ static const struct dev_pm_ops vic_pm_ops = { struct platform_driver tegra_vic_driver = { .driver = { .name = "tegra-vic", - .of_match_table = vic_match, + .of_match_table = tegra_vic_of_match, .pm = &vic_pm_ops }, .probe = vic_probe, diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index 661e47acc354..adbf3bfae805 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -74,6 +74,16 @@ config QCOM_MDT_LOADER tristate select QCOM_SCM +config QCOM_OCMEM + tristate "Qualcomm On Chip Memory (OCMEM) driver" + depends on ARCH_QCOM + select QCOM_SCM + help + The On Chip Memory (OCMEM) allocator allows various clients to + allocate memory from OCMEM based on performance, latency and power + requirements. This is typically used by the GPU, camera/video, and + audio components on some Snapdragon SoCs. + config QCOM_PM bool "Qualcomm Power Management" depends on ARCH_QCOM && !ARM64 diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index 162788701a77..aab333720bfd 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_QCOM_COMMAND_DB) += cmd-db.o obj-$(CONFIG_QCOM_GLINK_SSR) += glink_ssr.o obj-$(CONFIG_QCOM_GSBI) += qcom_gsbi.o obj-$(CONFIG_QCOM_MDT_LOADER) += mdt_loader.o +obj-$(CONFIG_QCOM_OCMEM) += ocmem.o obj-$(CONFIG_QCOM_PM) += spm.o obj-$(CONFIG_QCOM_QMI_HELPERS) += qmi_helpers.o qmi_helpers-y += qmi_encdec.o qmi_interface.o diff --git a/drivers/soc/qcom/ocmem.c b/drivers/soc/qcom/ocmem.c new file mode 100644 index 000000000000..7f9e9944d1ea --- /dev/null +++ b/drivers/soc/qcom/ocmem.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The On Chip Memory (OCMEM) allocator allows various clients to allocate + * memory from OCMEM based on performance, latency and power requirements. + * This is typically used by the GPU, camera/video, and audio components on + * some Snapdragon SoCs. + * + * Copyright (C) 2019 Brian Masney <masneyb@onstation.org> + * Copyright (C) 2015 Red Hat. Author: Rob Clark <robdclark@gmail.com> + */ + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/qcom_scm.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <soc/qcom/ocmem.h> + +enum region_mode { + WIDE_MODE = 0x0, + THIN_MODE, + MODE_DEFAULT = WIDE_MODE, +}; + +enum ocmem_macro_state { + PASSTHROUGH = 0, + PERI_ON = 1, + CORE_ON = 2, + CLK_OFF = 4, +}; + +struct ocmem_region { + bool interleaved; + enum region_mode mode; + unsigned int num_macros; + enum ocmem_macro_state macro_state[4]; + unsigned long macro_size; + unsigned long region_size; +}; + +struct ocmem_config { + uint8_t num_regions; + unsigned long macro_size; +}; + +struct ocmem { + struct device *dev; + const struct ocmem_config *config; + struct resource *memory; + void __iomem *mmio; + unsigned int num_ports; + unsigned int num_macros; + bool interleaved; + struct ocmem_region *regions; + unsigned long active_allocations; +}; + +#define OCMEM_MIN_ALIGN SZ_64K +#define OCMEM_MIN_ALLOC SZ_64K + +#define OCMEM_REG_HW_VERSION 0x00000000 +#define OCMEM_REG_HW_PROFILE 0x00000004 + +#define OCMEM_REG_REGION_MODE_CTL 0x00001000 +#define OCMEM_REGION_MODE_CTL_REG0_THIN 0x00000001 +#define OCMEM_REGION_MODE_CTL_REG1_THIN 0x00000002 +#define OCMEM_REGION_MODE_CTL_REG2_THIN 0x00000004 +#define OCMEM_REGION_MODE_CTL_REG3_THIN 0x00000008 + +#define OCMEM_REG_GFX_MPU_START 0x00001004 +#define OCMEM_REG_GFX_MPU_END 0x00001008 + +#define OCMEM_HW_PROFILE_NUM_PORTS(val) FIELD_PREP(0x0000000f, (val)) +#define OCMEM_HW_PROFILE_NUM_MACROS(val) FIELD_PREP(0x00003f00, (val)) + +#define OCMEM_HW_PROFILE_LAST_REGN_HALFSIZE 0x00010000 +#define OCMEM_HW_PROFILE_INTERLEAVING 0x00020000 +#define OCMEM_REG_GEN_STATUS 0x0000000c + +#define OCMEM_REG_PSGSC_STATUS 0x00000038 +#define OCMEM_REG_PSGSC_CTL(i0) (0x0000003c + 0x1*(i0)) + +#define OCMEM_PSGSC_CTL_MACRO0_MODE(val) FIELD_PREP(0x00000007, (val)) +#define OCMEM_PSGSC_CTL_MACRO1_MODE(val) FIELD_PREP(0x00000070, (val)) +#define OCMEM_PSGSC_CTL_MACRO2_MODE(val) FIELD_PREP(0x00000700, (val)) +#define OCMEM_PSGSC_CTL_MACRO3_MODE(val) FIELD_PREP(0x00007000, (val)) + +#define OCMEM_CLK_CORE_IDX 0 +static struct clk_bulk_data ocmem_clks[] = { + { + .id = "core", + }, + { + .id = "iface", + }, +}; + +static inline void ocmem_write(struct ocmem *ocmem, u32 reg, u32 data) +{ + writel(data, ocmem->mmio + reg); +} + +static inline u32 ocmem_read(struct ocmem *ocmem, u32 reg) +{ + return readl(ocmem->mmio + reg); +} + +static void update_ocmem(struct ocmem *ocmem) +{ + uint32_t region_mode_ctrl = 0x0; + int i; + + if (!qcom_scm_ocmem_lock_available()) { + for (i = 0; i < ocmem->config->num_regions; i++) { + struct ocmem_region *region = &ocmem->regions[i]; + + if (region->mode == THIN_MODE) + region_mode_ctrl |= BIT(i); + } + + dev_dbg(ocmem->dev, "ocmem_region_mode_control %x\n", + region_mode_ctrl); + ocmem_write(ocmem, OCMEM_REG_REGION_MODE_CTL, region_mode_ctrl); + } + + for (i = 0; i < ocmem->config->num_regions; i++) { + struct ocmem_region *region = &ocmem->regions[i]; + u32 data; + + data = OCMEM_PSGSC_CTL_MACRO0_MODE(region->macro_state[0]) | + OCMEM_PSGSC_CTL_MACRO1_MODE(region->macro_state[1]) | + OCMEM_PSGSC_CTL_MACRO2_MODE(region->macro_state[2]) | + OCMEM_PSGSC_CTL_MACRO3_MODE(region->macro_state[3]); + + ocmem_write(ocmem, OCMEM_REG_PSGSC_CTL(i), data); + } +} + +static unsigned long phys_to_offset(struct ocmem *ocmem, + unsigned long addr) +{ + if (addr < ocmem->memory->start || addr >= ocmem->memory->end) + return 0; + + return addr - ocmem->memory->start; +} + +static unsigned long device_address(struct ocmem *ocmem, + enum ocmem_client client, + unsigned long addr) +{ + WARN_ON(client != OCMEM_GRAPHICS); + + /* TODO: gpu uses phys_to_offset, but others do not.. */ + return phys_to_offset(ocmem, addr); +} + +static void update_range(struct ocmem *ocmem, struct ocmem_buf *buf, + enum ocmem_macro_state mstate, enum region_mode rmode) +{ + unsigned long offset = 0; + int i, j; + + for (i = 0; i < ocmem->config->num_regions; i++) { + struct ocmem_region *region = &ocmem->regions[i]; + + if (buf->offset <= offset && offset < buf->offset + buf->len) + region->mode = rmode; + + for (j = 0; j < region->num_macros; j++) { + if (buf->offset <= offset && + offset < buf->offset + buf->len) + region->macro_state[j] = mstate; + + offset += region->macro_size; + } + } + + update_ocmem(ocmem); +} + +struct ocmem *of_get_ocmem(struct device *dev) +{ + struct platform_device *pdev; + struct device_node *devnode; + + devnode = of_parse_phandle(dev->of_node, "sram", 0); + if (!devnode || !devnode->parent) { + dev_err(dev, "Cannot look up sram phandle\n"); + return ERR_PTR(-ENODEV); + } + + pdev = of_find_device_by_node(devnode->parent); + if (!pdev) { + dev_err(dev, "Cannot find device node %s\n", devnode->name); + return ERR_PTR(-EPROBE_DEFER); + } + + return platform_get_drvdata(pdev); +} +EXPORT_SYMBOL(of_get_ocmem); + +struct ocmem_buf *ocmem_allocate(struct ocmem *ocmem, enum ocmem_client client, + unsigned long size) +{ + struct ocmem_buf *buf; + int ret; + + /* TODO: add support for other clients... */ + if (WARN_ON(client != OCMEM_GRAPHICS)) + return ERR_PTR(-ENODEV); + + if (size < OCMEM_MIN_ALLOC || !IS_ALIGNED(size, OCMEM_MIN_ALIGN)) + return ERR_PTR(-EINVAL); + + if (test_and_set_bit_lock(BIT(client), &ocmem->active_allocations)) + return ERR_PTR(-EBUSY); + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto err_unlock; + } + + buf->offset = 0; + buf->addr = device_address(ocmem, client, buf->offset); + buf->len = size; + + update_range(ocmem, buf, CORE_ON, WIDE_MODE); + + if (qcom_scm_ocmem_lock_available()) { + ret = qcom_scm_ocmem_lock(QCOM_SCM_OCMEM_GRAPHICS_ID, + buf->offset, buf->len, WIDE_MODE); + if (ret) { + dev_err(ocmem->dev, "could not lock: %d\n", ret); + ret = -EINVAL; + goto err_kfree; + } + } else { + ocmem_write(ocmem, OCMEM_REG_GFX_MPU_START, buf->offset); + ocmem_write(ocmem, OCMEM_REG_GFX_MPU_END, + buf->offset + buf->len); + } + + dev_dbg(ocmem->dev, "using %ldK of OCMEM at 0x%08lx for client %d\n", + size / 1024, buf->addr, client); + + return buf; + +err_kfree: + kfree(buf); +err_unlock: + clear_bit_unlock(BIT(client), &ocmem->active_allocations); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ocmem_allocate); + +void ocmem_free(struct ocmem *ocmem, enum ocmem_client client, + struct ocmem_buf *buf) +{ + /* TODO: add support for other clients... */ + if (WARN_ON(client != OCMEM_GRAPHICS)) + return; + + update_range(ocmem, buf, CLK_OFF, MODE_DEFAULT); + + if (qcom_scm_ocmem_lock_available()) { + int ret; + + ret = qcom_scm_ocmem_unlock(QCOM_SCM_OCMEM_GRAPHICS_ID, + buf->offset, buf->len); + if (ret) + dev_err(ocmem->dev, "could not unlock: %d\n", ret); + } else { + ocmem_write(ocmem, OCMEM_REG_GFX_MPU_START, 0x0); + ocmem_write(ocmem, OCMEM_REG_GFX_MPU_END, 0x0); + } + + kfree(buf); + + clear_bit_unlock(BIT(client), &ocmem->active_allocations); +} +EXPORT_SYMBOL(ocmem_free); + +static int ocmem_dev_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + unsigned long reg, region_size; + int i, j, ret, num_banks; + struct resource *res; + struct ocmem *ocmem; + + if (!qcom_scm_is_available()) + return -EPROBE_DEFER; + + ocmem = devm_kzalloc(dev, sizeof(*ocmem), GFP_KERNEL); + if (!ocmem) + return -ENOMEM; + + ocmem->dev = dev; + ocmem->config = device_get_match_data(dev); + + ret = devm_clk_bulk_get(dev, ARRAY_SIZE(ocmem_clks), ocmem_clks); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Unable to get clocks\n"); + + return ret; + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl"); + ocmem->mmio = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ocmem->mmio)) { + dev_err(&pdev->dev, "Failed to ioremap ocmem_ctrl resource\n"); + return PTR_ERR(ocmem->mmio); + } + + ocmem->memory = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "mem"); + if (!ocmem->memory) { + dev_err(dev, "Could not get mem region\n"); + return -ENXIO; + } + + /* The core clock is synchronous with graphics */ + WARN_ON(clk_set_rate(ocmem_clks[OCMEM_CLK_CORE_IDX].clk, 1000) < 0); + + ret = clk_bulk_prepare_enable(ARRAY_SIZE(ocmem_clks), ocmem_clks); + if (ret) { + dev_info(ocmem->dev, "Failed to enable clocks\n"); + return ret; + } + + if (qcom_scm_restore_sec_cfg_available()) { + dev_dbg(dev, "configuring scm\n"); + ret = qcom_scm_restore_sec_cfg(QCOM_SCM_OCMEM_DEV_ID, 0); + if (ret) { + dev_err(dev, "Could not enable secure configuration\n"); + goto err_clk_disable; + } + } + + reg = ocmem_read(ocmem, OCMEM_REG_HW_PROFILE); + ocmem->num_ports = OCMEM_HW_PROFILE_NUM_PORTS(reg); + ocmem->num_macros = OCMEM_HW_PROFILE_NUM_MACROS(reg); + ocmem->interleaved = !!(reg & OCMEM_HW_PROFILE_INTERLEAVING); + + num_banks = ocmem->num_ports / 2; + region_size = ocmem->config->macro_size * num_banks; + + dev_info(dev, "%u ports, %u regions, %u macros, %sinterleaved\n", + ocmem->num_ports, ocmem->config->num_regions, + ocmem->num_macros, ocmem->interleaved ? "" : "not "); + + ocmem->regions = devm_kcalloc(dev, ocmem->config->num_regions, + sizeof(struct ocmem_region), GFP_KERNEL); + if (!ocmem->regions) { + ret = -ENOMEM; + goto err_clk_disable; + } + + for (i = 0; i < ocmem->config->num_regions; i++) { + struct ocmem_region *region = &ocmem->regions[i]; + + if (WARN_ON(num_banks > ARRAY_SIZE(region->macro_state))) { + ret = -EINVAL; + goto err_clk_disable; + } + + region->mode = MODE_DEFAULT; + region->num_macros = num_banks; + + if (i == (ocmem->config->num_regions - 1) && + reg & OCMEM_HW_PROFILE_LAST_REGN_HALFSIZE) { + region->macro_size = ocmem->config->macro_size / 2; + region->region_size = region_size / 2; + } else { + region->macro_size = ocmem->config->macro_size; + region->region_size = region_size; + } + + for (j = 0; j < ARRAY_SIZE(region->macro_state); j++) + region->macro_state[j] = CLK_OFF; + } + + platform_set_drvdata(pdev, ocmem); + + return 0; + +err_clk_disable: + clk_bulk_disable_unprepare(ARRAY_SIZE(ocmem_clks), ocmem_clks); + return ret; +} + +static int ocmem_dev_remove(struct platform_device *pdev) +{ + clk_bulk_disable_unprepare(ARRAY_SIZE(ocmem_clks), ocmem_clks); + + return 0; +} + +static const struct ocmem_config ocmem_8974_config = { + .num_regions = 3, + .macro_size = SZ_128K, +}; + +static const struct of_device_id ocmem_of_match[] = { + { .compatible = "qcom,msm8974-ocmem", .data = &ocmem_8974_config }, + { } +}; + +MODULE_DEVICE_TABLE(of, ocmem_of_match); + +static struct platform_driver ocmem_driver = { + .probe = ocmem_dev_probe, + .remove = ocmem_dev_remove, + .driver = { + .name = "ocmem", + .of_match_table = ocmem_of_match, + }, +}; + +module_platform_driver(ocmem_driver); + +MODULE_DESCRIPTION("On Chip Memory (OCMEM) allocator for some Snapdragon SoCs"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h index c6b61ca97053..21b34a96cfd8 100644 --- a/include/linux/agpgart.h +++ b/include/linux/agpgart.h @@ -30,8 +30,6 @@ #include <linux/agp_backend.h> #include <uapi/linux/agpgart.h> -#define AGPGART_MINOR 175 - struct agp_info { struct agp_version version; /* version of the driver */ u32 bridge_id; /* bridge vendor/device */ diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3247a3dc7934..6f2ca42152a0 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -33,6 +33,7 @@ #define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 /* unused */ #define I2O_MINOR 166 +#define AGPGART_MINOR 175 #define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 #define IRNET_MINOR 187 diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 2d5eff506e13..04382e1798e4 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -24,6 +24,26 @@ struct qcom_scm_vmperm { int perm; }; +enum qcom_scm_ocmem_client { + QCOM_SCM_OCMEM_UNUSED_ID = 0x0, + QCOM_SCM_OCMEM_GRAPHICS_ID, + QCOM_SCM_OCMEM_VIDEO_ID, + QCOM_SCM_OCMEM_LP_AUDIO_ID, + QCOM_SCM_OCMEM_SENSORS_ID, + QCOM_SCM_OCMEM_OTHER_OS_ID, + QCOM_SCM_OCMEM_DEBUG_ID, +}; + +enum qcom_scm_sec_dev_id { + QCOM_SCM_MDSS_DEV_ID = 1, + QCOM_SCM_OCMEM_DEV_ID = 5, + QCOM_SCM_PCIE0_DEV_ID = 11, + QCOM_SCM_PCIE1_DEV_ID = 12, + QCOM_SCM_GFX_DEV_ID = 18, + QCOM_SCM_UFS_DEV_ID = 19, + QCOM_SCM_ICE_DEV_ID = 20, +}; + #define QCOM_SCM_VMID_HLOS 0x3 #define QCOM_SCM_VMID_MSS_MSA 0xF #define QCOM_SCM_VMID_WLAN 0x18 @@ -41,6 +61,11 @@ extern bool qcom_scm_is_available(void); extern bool qcom_scm_hdcp_available(void); extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); +extern bool qcom_scm_ocmem_lock_available(void); +extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size, u32 mode); +extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size); extern bool qcom_scm_pas_supported(u32 peripheral); extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size); @@ -55,6 +80,7 @@ extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, extern void qcom_scm_cpu_power_down(u32 flags); extern u32 qcom_scm_get_version(void); extern int qcom_scm_set_remote_state(u32 state, u32 id); +extern bool qcom_scm_restore_sec_cfg_available(void); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); diff --git a/include/soc/qcom/ocmem.h b/include/soc/qcom/ocmem.h new file mode 100644 index 000000000000..02a8bc2677b1 --- /dev/null +++ b/include/soc/qcom/ocmem.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * The On Chip Memory (OCMEM) allocator allows various clients to allocate + * memory from OCMEM based on performance, latency and power requirements. + * This is typically used by the GPU, camera/video, and audio components on + * some Snapdragon SoCs. + * + * Copyright (C) 2019 Brian Masney <masneyb@onstation.org> + * Copyright (C) 2015 Red Hat. Author: Rob Clark <robdclark@gmail.com> + */ + +#include <linux/device.h> +#include <linux/err.h> + +#ifndef __OCMEM_H__ +#define __OCMEM_H__ + +enum ocmem_client { + /* GMEM clients */ + OCMEM_GRAPHICS = 0x0, + /* + * TODO add more once ocmem_allocate() is clever enough to + * deal with multiple clients. + */ + OCMEM_CLIENT_MAX, +}; + +struct ocmem; + +struct ocmem_buf { + unsigned long offset; + unsigned long addr; + unsigned long len; +}; + +#if IS_ENABLED(CONFIG_QCOM_OCMEM) + +struct ocmem *of_get_ocmem(struct device *dev); +struct ocmem_buf *ocmem_allocate(struct ocmem *ocmem, enum ocmem_client client, + unsigned long size); +void ocmem_free(struct ocmem *ocmem, enum ocmem_client client, + struct ocmem_buf *buf); + +#else /* IS_ENABLED(CONFIG_QCOM_OCMEM) */ + +static inline struct ocmem *of_get_ocmem(struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ocmem_buf *ocmem_allocate(struct ocmem *ocmem, + enum ocmem_client client, + unsigned long size) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ocmem_free(struct ocmem *ocmem, enum ocmem_client client, + struct ocmem_buf *buf) +{ +} + +#endif /* IS_ENABLED(CONFIG_QCOM_OCMEM) */ + +#endif /* __OCMEM_H__ */ |