From ced8c5176b0d7721639c4b29af78c7f60410effb Mon Sep 17 00:00:00 2001 From: Anatoliy Klymenko Date: Fri, 26 Apr 2024 12:27:56 -0700 Subject: drm: xlnx: zynqmp_dpsub: Fix few function comments Fix arguments description for zynqmp_disp_layer_find_live_format() and zynqmp_disp_layer_set_live_format(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404260616.KFGDpCDN-lkp@intel.com/ Signed-off-by: Anatoliy Klymenko Signed-off-by: Tomi Valkeinen Fixes: 1b5151bd3a2e ("drm: xlnx: zynqmp_dpsub: Set input live format") Link: https://patchwork.freedesktop.org/patch/msgid/20240426-dp-live-fmt-fix-v3-1-e904b5ae51d7@amd.com (cherry picked from commit 87f36e03c0f1d69245ad295309418e982c88fbe7) Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/xlnx/zynqmp_disp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xlnx/zynqmp_disp.c b/drivers/gpu/drm/xlnx/zynqmp_disp.c index 13157da0089e..423f5f4943cc 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_disp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_disp.c @@ -940,7 +940,7 @@ zynqmp_disp_layer_find_format(struct zynqmp_disp_layer *layer, * zynqmp_disp_layer_find_live_format - Find format information for given * media bus format * @layer: The layer - * @drm_fmt: Media bus format to search + * @media_bus_format: Media bus format to search * * Search display subsystem format information corresponding to the given media * bus format @media_bus_format for the @layer, and return a pointer to the @@ -1117,7 +1117,7 @@ void zynqmp_disp_layer_set_format(struct zynqmp_disp_layer *layer, /** * zynqmp_disp_layer_set_live_format - Set the live video layer format * @layer: The layer - * @info: The format info + * @media_bus_format: Media bus format to set * * NOTE: This function should not be used to set format for non-live video * layer. Use zynqmp_disp_layer_set_format() instead. -- cgit v1.2.3 From 713a75079f37b92835db48b27699e540657e3c5a Mon Sep 17 00:00:00 2001 From: Anatoliy Klymenko Date: Fri, 26 Apr 2024 12:27:57 -0700 Subject: drm: xlnx: zynqmp_dpsub: Fix compilation error Fix W=1 clang 19 compilation error in zynqmp_disp_layer_drm_formats(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404260946.4oZXvHD2-lkp@intel.com/ Signed-off-by: Anatoliy Klymenko Signed-off-by: Tomi Valkeinen Fixes: b0f0469ab662 ("drm: xlnx: zynqmp_dpsub: Anounce supported input formats") Link: https://patchwork.freedesktop.org/patch/msgid/20240426-dp-live-fmt-fix-v3-2-e904b5ae51d7@amd.com (cherry picked from commit c72211751870ffa2cff5d91834059456cfa7cbd5) Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/xlnx/zynqmp_disp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xlnx/zynqmp_disp.c b/drivers/gpu/drm/xlnx/zynqmp_disp.c index 423f5f4943cc..c9fb432d4cbd 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_disp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_disp.c @@ -981,7 +981,7 @@ u32 *zynqmp_disp_layer_drm_formats(struct zynqmp_disp_layer *layer, unsigned int i; u32 *formats; - if (WARN_ON(!layer->mode == ZYNQMP_DPSUB_LAYER_NONLIVE)) { + if (WARN_ON(layer->mode != ZYNQMP_DPSUB_LAYER_NONLIVE)) { *num_formats = 0; return NULL; } -- cgit v1.2.3 From d2143297579f12ea22479d403d955819838e7e67 Mon Sep 17 00:00:00 2001 From: Antonino Maniscalco Date: Thu, 2 May 2024 18:51:54 +0200 Subject: drm/panthor: Fix tiler OOM handling to allow incremental rendering If the kernel couldn't allocate memory because we reached the maximum number of chunks but no render passes are in flight (panthor_heap_grow() returning -ENOMEM), we should defer the OOM handling to the FW by returning a NULL chunk. The FW will then call the tiler OOM exception handler, which is supposed to implement incremental rendering (execute an intermediate fragment job to flush the pending primitives, release the tiler memory that was used to store those primitives, and start over from where it stopped). Instead of checking for both ENOMEM and EBUSY, make panthor_heap_grow() return ENOMEM no matter the reason of this allocation failure, the FW doesn't care anyway. v3: - Add R-bs v2: - Make panthor_heap_grow() return -ENOMEM for all kind of allocation failures - Document the panthor_heap_grow() semantics Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Signed-off-by: Antonino Maniscalco Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-2-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_heap.c | 12 ++++++++---- drivers/gpu/drm/panthor/panthor_sched.c | 7 ++++++- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index 143fa35f2e74..c3c0ba744937 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -410,6 +410,13 @@ out_unlock: * @renderpasses_in_flight: Number of render passes currently in-flight. * @pending_frag_count: Number of fragment jobs waiting for execution/completion. * @new_chunk_gpu_va: Pointer used to return the chunk VA. + * + * Return: + * - 0 if a new heap was allocated + * - -ENOMEM if the tiler context reached the maximum number of chunks + * or if too many render passes are in-flight + * or if the allocation failed + * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid */ int panthor_heap_grow(struct panthor_heap_pool *pool, u64 heap_gpu_va, @@ -439,10 +446,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool, * handler provided by the userspace driver, if any). */ if (renderpasses_in_flight > heap->target_in_flight || - (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) { - ret = -EBUSY; - goto out_unlock; - } else if (heap->chunk_count >= heap->max_chunks) { + heap->chunk_count >= heap->max_chunks) { ret = -ENOMEM; goto out_unlock; } diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 7f16a4a14e9a..c126251c5ba7 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1385,7 +1385,12 @@ static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id) pending_frag_count, &new_chunk_va); } - if (ret && ret != -EBUSY) { + /* If the heap context doesn't have memory for us, we want to let the + * FW try to reclaim memory by waiting for fragment jobs to land or by + * executing the tiler OOM exception handler, which is supposed to + * implement incremental rendering. + */ + if (ret && ret != -ENOMEM) { drm_warn(&ptdev->base, "Failed to extend the tiler heap\n"); group->fatal_queues |= BIT(cs_id); sched_queue_delayed_work(sched, tick, 0); -- cgit v1.2.3 From e3193f0fbd6d83510ff6879ac248f42a7c0fefe7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 May 2024 18:51:55 +0200 Subject: drm/panthor: Make sure the tiler initial/max chunks are consistent It doesn't make sense to have a maximum number of chunks smaller than the initial number of chunks attached to the context. Fix the uAPI header to reflect the new constraint, and mention the undocumented "initial_chunk_count > 0" constraint while at it. v3: - Add R-b v2: - Fix the check Fixes: 9cca48fa4f89 ("drm/panthor: Add the heap logical block") Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-3-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_heap.c | 3 +++ include/uapi/drm/panthor_drm.h | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index c3c0ba744937..3be86ec383d6 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -281,6 +281,9 @@ int panthor_heap_create(struct panthor_heap_pool *pool, if (initial_chunk_count == 0) return -EINVAL; + if (initial_chunk_count > max_chunks) + return -EINVAL; + if (hweight32(chunk_size) != 1 || chunk_size < SZ_256K || chunk_size > SZ_2M) return -EINVAL; diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index dadb05ab1235..5db80a0682d5 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -895,13 +895,17 @@ struct drm_panthor_tiler_heap_create { /** @vm_id: VM ID the tiler heap should be mapped to */ __u32 vm_id; - /** @initial_chunk_count: Initial number of chunks to allocate. */ + /** @initial_chunk_count: Initial number of chunks to allocate. Must be at least one. */ __u32 initial_chunk_count; /** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */ __u32 chunk_size; - /** @max_chunks: Maximum number of chunks that can be allocated. */ + /** + * @max_chunks: Maximum number of chunks that can be allocated. + * + * Must be at least @initial_chunk_count. + */ __u32 max_chunks; /** -- cgit v1.2.3 From 69a429905ceccad547e4a532b08f9d32c7f3422a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 May 2024 18:51:56 +0200 Subject: drm/panthor: Relax the constraints on the tiler chunk size The field used to store the chunk size if 12 bits wide, and the encoding is chunk_size = chunk_header.chunk_size << 12, which gives us a theoretical [4k:8M] range. This range is further limited by implementation constraints, and all known implementations seem to impose a [128k:8M] range, so do the same here. We also relax the power-of-two constraint, which doesn't seem to exist on v10. This will allow userspace to fine-tune initial/max tiler memory on memory-constrained devices. v4: - Actually fix the range in the kerneldoc v3: - Add R-bs - Fix valid range in the kerneldoc v2: - Turn the power-of-two constraint into a page-aligned constraint to allow fine-tune of the initial/max heap memory size - Fix the panthor_heap_create() kerneldoc Fixes: 9cca48fa4f89 ("drm/panthor: Add the heap logical block") Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-4-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_heap.c | 8 ++++---- include/uapi/drm/panthor_drm.h | 6 +++++- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index 3be86ec383d6..b0fc5b9ee847 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -253,8 +253,8 @@ int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) * @pool: Pool to instantiate the heap context from. * @initial_chunk_count: Number of chunk allocated at initialization time. * Must be at least 1. - * @chunk_size: The size of each chunk. Must be a power of two between 256k - * and 2M. + * @chunk_size: The size of each chunk. Must be page-aligned and lie in the + * [128k:8M] range. * @max_chunks: Maximum number of chunks that can be allocated. * @target_in_flight: Maximum number of in-flight render passes. * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap @@ -284,8 +284,8 @@ int panthor_heap_create(struct panthor_heap_pool *pool, if (initial_chunk_count > max_chunks) return -EINVAL; - if (hweight32(chunk_size) != 1 || - chunk_size < SZ_256K || chunk_size > SZ_2M) + if (!IS_ALIGNED(chunk_size, PAGE_SIZE) || + chunk_size < SZ_128K || chunk_size > SZ_8M) return -EINVAL; down_read(&pool->lock); diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 5db80a0682d5..b8220d2e698f 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -898,7 +898,11 @@ struct drm_panthor_tiler_heap_create { /** @initial_chunk_count: Initial number of chunks to allocate. Must be at least one. */ __u32 initial_chunk_count; - /** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */ + /** + * @chunk_size: Chunk size. + * + * Must be page-aligned and lie in the [128k:8M] range. + */ __u32 chunk_size; /** -- cgit v1.2.3 From 8e43b1e537d4fb313efac1b5d0d01db0fe35f695 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 May 2024 18:51:57 +0200 Subject: drm/panthor: Fix an off-by-one in the heap context retrieval logic The heap ID is used to index the heap context pool, and allocating in the [1:MAX_HEAPS_PER_POOL] leads to an off-by-one. This was originally to avoid returning a zero heap handle, but given the handle is formed with (vm_id << 16) | heap_id, with vm_id > 0, we already can't end up with a valid heap handle that's zero. v4: - s/XA_FLAGS_ALLOC1/XA_FLAGS_ALLOC/ v3: - Allocate in the [0:MAX_HEAPS_PER_POOL-1] range v2: - New patch Fixes: 9cca48fa4f89 ("drm/panthor: Add the heap logical block") Reported-by: Eric Smith Signed-off-by: Boris Brezillon Tested-by: Eric Smith Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240502165158.1458959-5-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_heap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index b0fc5b9ee847..95a1c6c9f35e 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -323,7 +323,8 @@ int panthor_heap_create(struct panthor_heap_pool *pool, if (!pool->vm) { ret = -EINVAL; } else { - ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL); + ret = xa_alloc(&pool->xa, &id, heap, + XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL); if (!ret) { void *gpu_ctx = panthor_get_heap_ctx(pool, id); @@ -543,7 +544,7 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) pool->vm = vm; pool->ptdev = ptdev; init_rwsem(&pool->lock); - xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1); + xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); kref_init(&pool->refcount); pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, -- cgit v1.2.3 From 2b2a26b3314210585ca6d552a421921a3936713b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 May 2024 20:38:09 +0200 Subject: drm/panthor: Force an immediate reset on unrecoverable faults If the FW reports an unrecoverable fault, we need to reset the GPU before we can start re-using it again. Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-2-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_device.c | 1 + drivers/gpu/drm/panthor/panthor_device.h | 1 + drivers/gpu/drm/panthor/panthor_sched.c | 11 ++++++++++- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 75276cbeba20..4c5b54e7abb7 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -293,6 +293,7 @@ static const struct panthor_exception_info panthor_exception_infos[] = { PANTHOR_EXCEPTION(ACTIVE), PANTHOR_EXCEPTION(CS_RES_TERM), PANTHOR_EXCEPTION(CS_CONFIG_FAULT), + PANTHOR_EXCEPTION(CS_UNRECOVERABLE), PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT), PANTHOR_EXCEPTION(CS_BUS_FAULT), PANTHOR_EXCEPTION(CS_INSTR_INVALID), diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index 2fdd671b38fd..e388c0472ba7 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -216,6 +216,7 @@ enum drm_panthor_exception_type { DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, + DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41, DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index c126251c5ba7..e455e8445582 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1281,7 +1281,16 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, if (group) group->fatal_queues |= BIT(cs_id); - sched_queue_delayed_work(sched, tick, 0); + if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) { + /* If this exception is unrecoverable, queue a reset, and make + * sure we stop scheduling groups until the reset has happened. + */ + panthor_device_schedule_reset(ptdev); + cancel_delayed_work(&sched->tick_work); + } else { + sched_queue_delayed_work(sched, tick, 0); + } + drm_warn(&ptdev->base, "CSG slot %d CS slot: %d\n" "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" -- cgit v1.2.3 From ff60c8da0aaf7ecf5f4d48bebeb3c1f52b2088dd Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 May 2024 20:38:10 +0200 Subject: drm/panthor: Keep a ref to the VM at the panthor_kernel_bo level Avoids use-after-free situations when panthor_fw_unplug() is called and the kernel BO was mapped to the FW VM. Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-3-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_fw.c | 4 ++-- drivers/gpu/drm/panthor/panthor_gem.c | 8 +++++--- drivers/gpu/drm/panthor/panthor_gem.h | 8 ++++++-- drivers/gpu/drm/panthor/panthor_heap.c | 8 ++++---- drivers/gpu/drm/panthor/panthor_sched.c | 11 +++++------ 5 files changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index fedf9627453f..394e00bd75bb 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -453,7 +453,7 @@ panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, ret = panthor_kernel_bo_vmap(mem); if (ret) { - panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem); + panthor_kernel_bo_destroy(mem); return ERR_PTR(ret); } @@ -1134,7 +1134,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev) panthor_fw_stop(ptdev); list_for_each_entry(section, &ptdev->fw->sections, node) - panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem); + panthor_kernel_bo_destroy(section->mem); /* We intentionally don't call panthor_vm_idle() and let * panthor_mmu_unplug() release the AS we acquired with diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index d6483266d0c2..38f560864879 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -26,18 +26,18 @@ static void panthor_gem_free_object(struct drm_gem_object *obj) /** * panthor_kernel_bo_destroy() - Destroy a kernel buffer object - * @vm: The VM this BO was mapped to. * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction * is skipped. */ -void panthor_kernel_bo_destroy(struct panthor_vm *vm, - struct panthor_kernel_bo *bo) +void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo) { + struct panthor_vm *vm; int ret; if (IS_ERR_OR_NULL(bo)) return; + vm = bo->vm; panthor_kernel_bo_vunmap(bo); if (drm_WARN_ON(bo->obj->dev, @@ -53,6 +53,7 @@ void panthor_kernel_bo_destroy(struct panthor_vm *vm, drm_gem_object_put(bo->obj); out_free_bo: + panthor_vm_put(vm); kfree(bo); } @@ -106,6 +107,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, if (ret) goto err_free_va; + kbo->vm = panthor_vm_get(vm); bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); drm_gem_object_get(bo->exclusive_vm_root_gem); bo->base.base.resv = bo->exclusive_vm_root_gem->resv; diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 3bccba394d00..e43021cf6d45 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -61,6 +61,11 @@ struct panthor_kernel_bo { */ struct drm_gem_object *obj; + /** + * @vm: VM this private buffer is attached to. + */ + struct panthor_vm *vm; + /** * @va_node: VA space allocated to this GEM. */ @@ -136,7 +141,6 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, size_t size, u32 bo_flags, u32 vm_map_flags, u64 gpu_va); -void panthor_kernel_bo_destroy(struct panthor_vm *vm, - struct panthor_kernel_bo *bo); +void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo); #endif /* __PANTHOR_GEM_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index 95a1c6c9f35e..3796a9eb22af 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -127,7 +127,7 @@ static void panthor_free_heap_chunk(struct panthor_vm *vm, heap->chunk_count--; mutex_unlock(&heap->lock); - panthor_kernel_bo_destroy(vm, chunk->bo); + panthor_kernel_bo_destroy(chunk->bo); kfree(chunk); } @@ -183,7 +183,7 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, return 0; err_destroy_bo: - panthor_kernel_bo_destroy(vm, chunk->bo); + panthor_kernel_bo_destroy(chunk->bo); err_free_chunk: kfree(chunk); @@ -395,7 +395,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool, mutex_unlock(&heap->lock); if (removed) { - panthor_kernel_bo_destroy(pool->vm, chunk->bo); + panthor_kernel_bo_destroy(chunk->bo); kfree(chunk); ret = 0; } else { @@ -595,7 +595,7 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); if (!IS_ERR_OR_NULL(pool->gpu_contexts)) - panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts); + panthor_kernel_bo_destroy(pool->gpu_contexts); /* Reflects the fact the pool has been destroyed. */ pool->vm = NULL; diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index e455e8445582..9308596e0812 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -826,8 +826,8 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_queue_put_syncwait_obj(queue); - panthor_kernel_bo_destroy(group->vm, queue->ringbuf); - panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem); + panthor_kernel_bo_destroy(queue->ringbuf); + panthor_kernel_bo_destroy(queue->iface.mem); kfree(queue); } @@ -837,15 +837,14 @@ static void group_release_work(struct work_struct *work) struct panthor_group *group = container_of(work, struct panthor_group, release_work); - struct panthor_device *ptdev = group->ptdev; u32 i; for (i = 0; i < group->queue_count; i++) group_free_queue(group, group->queues[i]); - panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf); - panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf); - panthor_kernel_bo_destroy(group->vm, group->syncobjs); + panthor_kernel_bo_destroy(group->suspend_buf); + panthor_kernel_bo_destroy(group->protm_suspend_buf); + panthor_kernel_bo_destroy(group->syncobjs); panthor_vm_put(group->vm); kfree(group); -- cgit v1.2.3 From a257e8182261da48b7c34615f2752f8a78ac108b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 May 2024 20:38:11 +0200 Subject: drm/panthor: Reset the FW VM to NULL on unplug This way get NULL derefs instead of use-after-free if the FW VM is referenced after the device has been unplugged. Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Acked-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-4-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_fw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 394e00bd75bb..857f3f11258a 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1142,6 +1142,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev) * state to keep the active_refcnt balanced. */ panthor_vm_put(ptdev->fw->vm); + ptdev->fw->vm = NULL; panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); } -- cgit v1.2.3 From 3ce4322b1a3a40ca175b16fc54cf22b041ecfd4b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 May 2024 20:38:12 +0200 Subject: drm/panthor: Call panthor_sched_post_reset() even if the reset failed We need to undo what was done in panthor_sched_pre_reset() even if the reset failed. We just flag all previously running groups as terminated when that happens to unblock things. Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240502183813.1612017-5-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_device.c | 7 +------ drivers/gpu/drm/panthor/panthor_sched.c | 19 ++++++++++++++----- drivers/gpu/drm/panthor/panthor_sched.h | 2 +- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 4c5b54e7abb7..4082c8f2951d 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -129,13 +129,8 @@ static void panthor_device_reset_work(struct work_struct *work) panthor_gpu_l2_power_on(ptdev); panthor_mmu_post_reset(ptdev); ret = panthor_fw_post_reset(ptdev); - if (ret) - goto out_dev_exit; - atomic_set(&ptdev->reset.pending, 0); - panthor_sched_post_reset(ptdev); - -out_dev_exit: + panthor_sched_post_reset(ptdev, ret != 0); drm_dev_exit(cookie); if (ret) { diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 9308596e0812..79ffcbc41d78 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2733,15 +2733,22 @@ void panthor_sched_pre_reset(struct panthor_device *ptdev) mutex_unlock(&sched->reset.lock); } -void panthor_sched_post_reset(struct panthor_device *ptdev) +void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed) { struct panthor_scheduler *sched = ptdev->scheduler; struct panthor_group *group, *group_tmp; mutex_lock(&sched->reset.lock); - list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) + list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) { + /* Consider all previously running group as terminated if the + * reset failed. + */ + if (reset_failed) + group->state = PANTHOR_CS_GROUP_TERMINATED; + panthor_group_start(group); + } /* We're done resetting the GPU, clear the reset.in_progress bit so we can * kick the scheduler. @@ -2749,9 +2756,11 @@ void panthor_sched_post_reset(struct panthor_device *ptdev) atomic_set(&sched->reset.in_progress, false); mutex_unlock(&sched->reset.lock); - sched_queue_delayed_work(sched, tick, 0); - - sched_queue_work(sched, sync_upd); + /* No need to queue a tick and update syncs if the reset failed. */ + if (!reset_failed) { + sched_queue_delayed_work(sched, tick, 0); + sched_queue_work(sched, sync_upd); + } } static void group_sync_upd_work(struct work_struct *work) diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h index 66438b1f331f..3a30d2328b30 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.h +++ b/drivers/gpu/drm/panthor/panthor_sched.h @@ -40,7 +40,7 @@ void panthor_group_pool_destroy(struct panthor_file *pfile); int panthor_sched_init(struct panthor_device *ptdev); void panthor_sched_unplug(struct panthor_device *ptdev); void panthor_sched_pre_reset(struct panthor_device *ptdev); -void panthor_sched_post_reset(struct panthor_device *ptdev); +void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed); void panthor_sched_suspend(struct panthor_device *ptdev); void panthor_sched_resume(struct panthor_device *ptdev); -- cgit v1.2.3 From 959314c438caf1b62d787f02d54a193efda38880 Mon Sep 17 00:00:00 2001 From: Mohamed Ahmed Date: Thu, 9 May 2024 23:43:52 +0300 Subject: drm/nouveau: use tile_mode and pte_kind for VM_BIND bo allocations Allow PTE kind and tile mode on BO create with VM_BIND, and add a GETPARAM to indicate this change. This is needed to support modifiers in NVK and ensure correctness when dealing with the nouveau GL driver. The userspace modifiers implementation this is for can be found here: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24795 Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI") Signed-off-by: Mohamed Ahmed Reviewed-by: Faith Ekstrand Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240509204352.7597-1-mohamedahmedegypt2001@gmail.com --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 3 +++ drivers/gpu/drm/nouveau/nouveau_bo.c | 44 ++++++++++++++------------------- include/uapi/drm/nouveau_drm.h | 7 ++++++ 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index f465fe93b1f7..d56909071de6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -272,6 +272,9 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = (u64)ttm_resource_manager_usage(vram_mgr); break; } + case NOUVEAU_GETPARAM_HAS_VMA_TILEMODE: + getparam->value = 1; + break; default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 1e2d28fd10dc..70fb003a6666 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -241,28 +241,28 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, } nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG); - if (!nouveau_cli_uvmm(cli) || internal) { - /* for BO noVM allocs, don't assign kinds */ - if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) { - nvbo->kind = (tile_flags & 0x0000ff00) >> 8; - if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { - kfree(nvbo); - return ERR_PTR(-EINVAL); - } - nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind; - } else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) { - nvbo->kind = (tile_flags & 0x00007f00) >> 8; - nvbo->comp = (tile_flags & 0x00030000) >> 16; - if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { - kfree(nvbo); - return ERR_PTR(-EINVAL); - } - } else { - nvbo->zeta = (tile_flags & 0x00000007); + if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) { + nvbo->kind = (tile_flags & 0x0000ff00) >> 8; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return ERR_PTR(-EINVAL); + } + + nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind; + } else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) { + nvbo->kind = (tile_flags & 0x00007f00) >> 8; + nvbo->comp = (tile_flags & 0x00030000) >> 16; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return ERR_PTR(-EINVAL); } - nvbo->mode = tile_mode; + } else { + nvbo->zeta = (tile_flags & 0x00000007); + } + nvbo->mode = tile_mode; + if (!nouveau_cli_uvmm(cli) || internal) { /* Determine the desirable target GPU page size for the buffer. */ for (i = 0; i < vmm->page_nr; i++) { /* Because we cannot currently allow VMM maps to fail @@ -304,12 +304,6 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, } nvbo->page = vmm->page[pi].shift; } else { - /* reject other tile flags when in VM mode. */ - if (tile_mode) - return ERR_PTR(-EINVAL); - if (tile_flags & ~NOUVEAU_GEM_TILE_NONCONTIG) - return ERR_PTR(-EINVAL); - /* Determine the desirable target GPU page size for the buffer. */ for (i = 0; i < vmm->page_nr; i++) { /* Because we cannot currently allow VMM maps to fail diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 8ad8d1cd1566..dd87f8f30793 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -68,6 +68,13 @@ extern "C" { */ #define NOUVEAU_GETPARAM_VRAM_USED 19 +/* + * NOUVEAU_GETPARAM_HAS_VMA_TILEMODE + * + * Query whether tile mode and PTE kind are accepted with VM allocs or not. + */ +#define NOUVEAU_GETPARAM_HAS_VMA_TILEMODE 20 + struct drm_nouveau_getparam { __u64 param; __u64 value; -- cgit v1.2.3 From 5a5a10d9db77939a22e1d65fc0a4ba6b5d8f4fce Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 17 May 2024 20:03:05 +0530 Subject: drm/buddy: Fix the warn on's during force merge Move the fallback and block incompatible checks above, so that we dont unnecessarily split the blocks and leaving the unmerged. This resolves the unnecessary warn on's thrown during force_merge call. v2:(Matthew) - Move the fallback and block incompatible checks above the contains check. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Matthew Auld Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature") Link: https://patchwork.kernel.org/project/dri-devel/patch/20240517135015.17565-1-Arunpravin.PaneerSelvam@amd.com/ Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240517143305.17894-1-Arunpravin.PaneerSelvam@amd.com --- drivers/gpu/drm/drm_buddy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 1daf778cf6fa..94f8c34fc293 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -524,11 +524,11 @@ __alloc_range_bias(struct drm_buddy *mm, continue; } + if (!fallback && block_incompatible(block, flags)) + continue; + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { - if (!fallback && block_incompatible(block, flags)) - continue; - /* * Find the free block within the range. */ -- cgit v1.2.3 From 20da948e3a807c67f0efe4f665e64728be370f3d Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 13 May 2024 16:02:43 +0800 Subject: drm/bridge: adv7511: Attach next bridge without creating connector The connector is created by either this ADV7511 bridge driver or any DRM device driver/previous bridge driver, so this ADV7511 bridge driver should not let the next bridge driver create connector. If the next bridge is a HDMI connector, the next bridge driver would fail to attach bridge from display_connector_attach() without the DRM_BRIDGE_ATTACH_NO_CONNECTOR flag. Add that flag to drm_bridge_attach() function call in adv7511_bridge_attach() to fix the issue. This fixes the issue where the HDMI connector bridge fails to attach to the previous ADV7535 bridge on i.MX8MP EVK platform: [ 2.216442] [drm:drm_bridge_attach] *ERROR* failed to attach bridge /hdmi-connector to encoder None-37: -22 [ 2.220675] mmc1: SDHCI controller on 30b50000.mmc [30b50000.mmc] using ADMA [ 2.226262] [drm:drm_bridge_attach] *ERROR* failed to attach bridge /soc@0/bus@30800000/i2c@30a30000/hdmi@3d to encoder None-37: -22 [ 2.245204] [drm:drm_bridge_attach] *ERROR* failed to attach bridge /soc@0/bus@32c00000/dsi@32e60000 to encoder None-37: -22 [ 2.256445] imx-lcdif 32e80000.display-controller: error -EINVAL: Failed to attach bridge for endpoint0 [ 2.265850] imx-lcdif 32e80000.display-controller: error -EINVAL: Cannot connect bridge [ 2.274009] imx-lcdif 32e80000.display-controller: probe with driver imx-lcdif failed with error -22 Fixes: 14b3cdbd0e5b ("drm/bridge: adv7511: make it honour next bridge in DT") Signed-off-by: Liu Ying Reviewed-by: Dmitry Baryshkov Reviewed-by: Robert Foss Acked-by: Sui Jingfeng Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20240513080243.3952292-1-victor.liu@nxp.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index dd21b81bd28f..66ccb61e2a66 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -953,7 +953,8 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge, int ret = 0; if (adv->next_bridge) { - ret = drm_bridge_attach(bridge->encoder, adv->next_bridge, bridge, flags); + ret = drm_bridge_attach(bridge->encoder, adv->next_bridge, bridge, + flags | DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret) return ret; } -- cgit v1.2.3 From 2a705f3e49d20b59cd9e5cc3061b2d92ebe1e5f0 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 11 Apr 2024 17:14:17 +0800 Subject: drm/amdkfd: handle duplicate BOs in reserve_bo_and_cond_vms Observed on gfx8 ASIC where KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM is used. Two attachments use the same VM, root PD would be locked twice. [ 57.910418] Call Trace: [ 57.793726] ? reserve_bo_and_cond_vms+0x111/0x1c0 [amdgpu] [ 57.793820] amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x6c/0x1c0 [amdgpu] [ 57.793923] ? idr_get_next_ul+0xbe/0x100 [ 57.793933] kfd_process_device_free_bos+0x7e/0xf0 [amdgpu] [ 57.794041] kfd_process_wq_release+0x2ae/0x3c0 [amdgpu] [ 57.794141] ? process_scheduled_works+0x29c/0x580 [ 57.794147] process_scheduled_works+0x303/0x580 [ 57.794157] ? __pfx_worker_thread+0x10/0x10 [ 57.794160] worker_thread+0x1a2/0x370 [ 57.794165] ? __pfx_worker_thread+0x10/0x10 [ 57.794167] kthread+0x11b/0x150 [ 57.794172] ? __pfx_kthread+0x10/0x10 [ 57.794177] ret_from_fork+0x3d/0x60 [ 57.794181] ? __pfx_kthread+0x10/0x10 [ 57.794184] ret_from_fork_asm+0x1b/0x30 Signed-off-by: Lang Yu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e4d4e55c08ad..0535b07987d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1188,7 +1188,8 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { -- cgit v1.2.3 From eb853413d02c8d9b27942429b261a9eef228f005 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 26 Apr 2024 14:56:35 +0800 Subject: drm/amdkfd: Let VRAM allocations go to GTT domain on small APUs Small APUs(i.e., consumer, embedded products) usually have a small carveout device memory which can't satisfy most compute workloads memory allocation requirements. We can't even run a Basic MNIST Example with a default 512MB carveout. https://github.com/pytorch/examples/tree/main/mnist. Error Log: "torch.cuda.OutOfMemoryError: HIP out of memory. Tried to allocate 84.00 MiB. GPU 0 has a total capacity of 512.00 MiB of which 0 bytes is free. Of the allocated memory 103.83 MiB is allocated by PyTorch, and 22.17 MiB is reserved by PyTorch but unallocated" Though we can change BIOS settings to enlarge carveout size, which is inflexible and may bring complaint. On the other hand, the memory resource can't be effectively used between host and device. The solution is MI300A approach, i.e., let VRAM allocations go to GTT. Then device and host can flexibly and effectively share memory resource. v2: Report local_mem_size_private as 0. (Felix) Signed-off-by: Lang Yu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 20 +++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 ++- 5 files changed, 23 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7ba05f030dd1..e3738d417245 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -455,6 +455,9 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, else mem_info->local_mem_size_private = KFD_XCP_MEMORY_SIZE(adev, xcp->id); + } else if (adev->flags & AMD_IS_APU) { + mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT); + mem_info->local_mem_size_private = 0; } else { mem_info->local_mem_size_public = adev->gmc.visible_vram_size; mem_info->local_mem_size_private = adev->gmc.real_vram_size - @@ -824,6 +827,8 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) } do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); return ALIGN_DOWN(tmp, PAGE_SIZE); + } else if (adev->flags & AMD_IS_APU) { + return (ttm_tt_pages_limit() << PAGE_SHIFT); } else { return adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0535b07987d9..8975cf41a91a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -196,7 +196,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return -EINVAL; vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); - if (adev->gmc.is_app_apu) { + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { system_mem_needed = size; ttm_mem_needed = size; } @@ -232,7 +232,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, "adev reference can't be null when vram is used"); if (adev && xcp_id >= 0) { adev->kfd.vram_used[xcp_id] += vram_needed; - adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ? + adev->kfd.vram_used_aligned[xcp_id] += + (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ? vram_needed : ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); } @@ -260,7 +261,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, if (adev) { adev->kfd.vram_used[xcp_id] -= size; - if (adev->gmc.is_app_apu) { + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { adev->kfd.vram_used_aligned[xcp_id] -= size; kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.ttm_mem_used -= size; @@ -889,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device */ - if ((adev != bo_adev && !adev->gmc.is_app_apu) && + if ((adev != bo_adev && !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU)) && ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { @@ -1657,7 +1658,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, - atomic64_read(&adev->vram_pin_size) - reserved_for_pt; - if (adev->gmc.is_app_apu) { + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { system_mem_available = no_system_mem_limit ? kfd_mem_limit.max_system_mem_limit : kfd_mem_limit.max_system_mem_limit - @@ -1705,7 +1706,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - if (adev->gmc.is_app_apu) { + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1952,7 +1953,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (size) { if (!is_imported && (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || - (adev->gmc.is_app_apu && + ((adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) && mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) *size = bo_size; else @@ -2374,8 +2375,9 @@ static int import_obj_create(struct amdgpu_device *adev, (*mem)->dmabuf = dma_buf; (*mem)->bo = bo; (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && + !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 4bcfbeac48fb..4816fcb9803a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -1023,7 +1023,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1)) return -EINVAL; - if (adev->gmc.is_app_apu) + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) return 0; pgmap = &kfddev->pgmap; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 386875e6eb96..069b81eeea03 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2619,7 +2619,8 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } - if (node->adev->gmc.is_app_apu) + if (node->adev->gmc.is_app_apu || + node->adev->flags & AMD_IS_APU) return 0; if (prange->preferred_loc == gpuid || @@ -3337,7 +3338,8 @@ svm_range_best_prefetch_location(struct svm_range *prange) goto out; } - if (bo_node->adev->gmc.is_app_apu) { + if (bo_node->adev->gmc.is_app_apu || + bo_node->adev->flags & AMD_IS_APU) { best_loc = 0; goto out; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 026863a0abcd..9c37bd0567ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -201,7 +201,8 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s * is initialized to not 0 when page migration register device memory. */ #define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\ - (adev)->gmc.is_app_apu) + (adev)->gmc.is_app_apu ||\ + ((adev)->flags & AMD_IS_APU)) void svm_range_bo_unref_async(struct svm_range_bo *svm_bo); -- cgit v1.2.3 From 5a507b7d2be15fddb95bf8dee01110b723e2bcd9 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Thu, 7 Mar 2024 14:29:57 +0800 Subject: drm/mst: Fix NULL pointer dereference at drm_dp_add_payload_part2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Commit: - commit 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement") accidently overwrite the commit - commit 54d217406afe ("drm: use mgr->dev in drm_dbg_kms in drm_dp_add_payload_part2") which cause regression. [How] Recover the original NULL fix and remove the unnecessary input parameter 'state' for drm_dp_add_payload_part2(). Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement") Reported-by: Leon Weiß Link: https://lore.kernel.org/r/38c253ea42072cc825dc969ac4e6b9b600371cc8.camel@ruhr-uni-bochum.de/ Cc: lyude@redhat.com Cc: imre.deak@intel.com Cc: stable@vger.kernel.org Cc: regressions@lists.linux.dev Reviewed-by: Harry Wentland Acked-by: Jani Nikula Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240307062957.2323620-1-Wayne.Lin@amd.com (cherry picked from commit 4545614c1d8da603e57b60dd66224d81b6ffc305) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- drivers/gpu/drm/display/drm_dp_mst_topology.c | 4 +--- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- include/drm/display/drm_dp_mst_helper.h | 1 - 5 files changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index c27063305a13..2c36f3d00ca2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -363,7 +363,7 @@ void dm_helpers_dp_mst_send_payload_allocation( mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); - ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload); + ret = drm_dp_add_payload_part2(mst_mgr, new_payload); if (ret) { amdgpu_dm_set_mst_status(&aconnector->mst_status, diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 3577786b5db2..7f8e1cfbe19d 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3421,7 +3421,6 @@ EXPORT_SYMBOL(drm_dp_remove_payload_part2); /** * drm_dp_add_payload_part2() - Execute payload update part 2 * @mgr: Manager to use. - * @state: The global atomic state * @payload: The payload to update * * If @payload was successfully assigned a starting time slot by drm_dp_add_payload_part1(), this @@ -3430,14 +3429,13 @@ EXPORT_SYMBOL(drm_dp_remove_payload_part2); * Returns: 0 on success, negative error code on failure. */ int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr, - struct drm_atomic_state *state, struct drm_dp_mst_atomic_payload *payload) { int ret = 0; /* Skip failed payloads */ if (payload->payload_allocation_status != DRM_DP_MST_PAYLOAD_ALLOCATION_DFP) { - drm_dbg_kms(state->dev, "Part 1 of payload creation for %s failed, skipping part 2\n", + drm_dbg_kms(mgr->dev, "Part 1 of payload creation for %s failed, skipping part 2\n", payload->port->connector->name); return -EIO; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index c772ba19c547..715d2f59f565 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1241,7 +1241,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, if (first_mst_stream) intel_ddi_wait_for_fec_status(encoder, pipe_config, true); - drm_dp_add_payload_part2(&intel_dp->mst_mgr, &state->base, + drm_dp_add_payload_part2(&intel_dp->mst_mgr, drm_atomic_get_mst_payload_state(mst_state, connector->port)); if (DISPLAY_VER(dev_priv) >= 12) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 0c3d88ad0b0e..88728a0b2c25 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -915,7 +915,7 @@ nv50_msto_cleanup(struct drm_atomic_state *state, msto->disabled = false; drm_dp_remove_payload_part2(mgr, new_mst_state, old_payload, new_payload); } else if (msto->enabled) { - drm_dp_add_payload_part2(mgr, state, new_payload); + drm_dp_add_payload_part2(mgr, new_payload); msto->enabled = false; } } diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index 3546b58a121b..cfe096389d94 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -871,7 +871,6 @@ int drm_dp_add_payload_part1(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state, struct drm_dp_mst_atomic_payload *payload); int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr, - struct drm_atomic_state *state, struct drm_dp_mst_atomic_payload *payload); void drm_dp_remove_payload_part1(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state, -- cgit v1.2.3 From e64e8f7c178e5228e0b2dbb504b9dc75953a319f Mon Sep 17 00:00:00 2001 From: Li Ma Date: Mon, 20 May 2024 18:43:55 +0800 Subject: drm/amdgpu/atomfirmware: add intergrated info v2.3 table [Why] The vram width value is 0. Because the integratedsysteminfo table in VBIOS has updated to 2.3. [How] Driver needs a new intergrated info v2.3 table too. Then the vram width value will be correct. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 15 +++++++++ drivers/gpu/drm/amd/include/atomfirmware.h | 43 ++++++++++++++++++++++++ 2 files changed, 58 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index a6d64bdbbb14..108003bdf1e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -212,6 +212,7 @@ union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; struct atom_integrated_system_info_v2_1 v21; + struct atom_integrated_system_info_v2_3 v23; }; union umc_info { @@ -360,6 +361,20 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 3: + mem_channel_number = igp_info->v23.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + mem_type = igp_info->v23.memorytype; + if (mem_type == LpDdr5MemType) + mem_channel_width = 32; + else + mem_channel_width = 64; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index af3eebb4c9bc..1acb2d2c5597 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1657,6 +1657,49 @@ struct atom_integrated_system_info_v2_2 uint32_t reserved4[189]; }; +struct uma_carveout_option { + char optionName[29]; //max length of string is 28chars + '\0'. Current design is for "minimum", "Medium", "High". This makes entire struct size 64bits + uint8_t memoryCarvedGb; //memory carved out with setting + uint8_t memoryRemainingGb; //memory remaining on system + union { + struct _flags { + uint8_t Auto : 1; + uint8_t Custom : 1; + uint8_t Reserved : 6; + } flags; + uint8_t all8; + } uma_carveout_option_flags; +}; + +struct atom_integrated_system_info_v2_3 { + struct atom_common_table_header table_header; + uint32_t vbios_misc; // enum of atom_system_vbiosmisc_def + uint32_t gpucapinfo; // enum of atom_system_gpucapinf_def + uint32_t system_config; + uint32_t cpucapinfo; + uint16_t gpuclk_ss_percentage; // unit of 0.001%, 1000 mean 1% + uint16_t gpuclk_ss_type; + uint16_t dpphy_override; // bit vector, enum of atom_sysinfo_dpphy_override_def + uint8_t memorytype; // enum of atom_dmi_t17_mem_type_def, APU memory type indication. + uint8_t umachannelnumber; // number of memory channels + uint8_t htc_hyst_limit; + uint8_t htc_tmp_limit; + uint8_t reserved1; // dp_ss_control + uint8_t gpu_package_id; + struct edp_info_table edp1_info; + struct edp_info_table edp2_info; + uint32_t reserved2[8]; + struct atom_external_display_connection_info extdispconninfo; + uint8_t UMACarveoutVersion; + uint8_t UMACarveoutIndexMax; + uint8_t UMACarveoutTypeDefault; + uint8_t UMACarveoutIndexDefault; + uint8_t UMACarveoutType; //Auto or Custom + uint8_t UMACarveoutIndex; + struct uma_carveout_option UMASizeControlOption[20]; + uint8_t reserved3[110]; +}; + // system_config enum atom_system_vbiosmisc_def{ INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT = 0x01, -- cgit v1.2.3