From 46bcb0a1214ac6677df8660ac0f6bdf1eff27e8f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 Oct 2024 17:05:10 +0100 Subject: drm/xe/guc: Fix inverted logic on snapshot->copy check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the check to see if snapshot->copy has been allocated is inverted and ends up dereferencing snapshot->copy when free'ing objects in the array when it is null or not free'ing the objects when snapshot->copy is allocated. Fix this by using the correct non-null pointer check logic. Fixes: d8ce1a977226 ("drm/xe/guc: Use a two stage dump for GuC logs and add more info") Signed-off-by: Colin Ian King Reviewed-by: John Harrison Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20241009160510.372195-1-colin.i.king@gmail.com --- drivers/gpu/drm/xe/xe_guc_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 93921f04153f..cc70f448d879 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -122,7 +122,7 @@ void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot) if (!snapshot) return; - if (!snapshot->copy) { + if (snapshot->copy) { for (i = 0; i < snapshot->num_chunks; i++) kfree(snapshot->copy[i]); kfree(snapshot->copy); -- cgit v1.2.3 From a4de6beb83fc5adee788518350247c629568901e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 9 Oct 2024 22:43:57 +0300 Subject: drm/xe/display: Separate the d3cold and non-d3cold runtime PM handling For clarity separate the d3cold and non-d3cold runtime PM handling. The only change in behavior is disabling polling later during runtime resume. This shouldn't make a difference, since the poll disabling is handled from a work, which could run at any point wrt. the runtime resume handler. The work will also require a runtime PM reference, syncing it with the resume handler. Cc: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241009194358.1321200-4-imre.deak@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 26b2cae11d46..9ef75a20c3c0 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -341,6 +341,9 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); intel_dmc_suspend(xe); + + if (runtime && has_display(xe)) + intel_hpd_poll_enable(xe); } void xe_display_pm_suspend(struct xe_device *xe) @@ -383,8 +386,10 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe) if (!xe->info.probe_display) return; - if (xe->d3cold.allowed) + if (xe->d3cold.allowed) { __xe_display_pm_suspend(xe, true); + return; + } intel_hpd_poll_enable(xe); } @@ -447,9 +452,11 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_resume(xe); drm_kms_helper_poll_enable(&xe->drm); intel_display_driver_enable_user_access(xe); - intel_hpd_poll_disable(xe); } + if (has_display(xe)) + intel_hpd_poll_disable(xe); + intel_opregion_resume(display); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); @@ -467,10 +474,12 @@ void xe_display_pm_runtime_resume(struct xe_device *xe) if (!xe->info.probe_display) return; - intel_hpd_poll_disable(xe); - - if (xe->d3cold.allowed) + if (xe->d3cold.allowed) { __xe_display_pm_resume(xe, true); + return; + } + + intel_hpd_poll_disable(xe); } -- cgit v1.2.3 From bbc4a30de095f0349d3c278500345a1b620d495e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 9 Oct 2024 22:43:58 +0300 Subject: drm/xe/display: Add missing HPD interrupt enabling during non-d3cold RPM resume Atm the display HPD interrupts that got disabled during runtime suspend, are re-enabled only if d3cold is enabled. Fix things by also re-enabling the interrupts if d3cold is disabled. Cc: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241009194358.1321200-5-imre.deak@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 9ef75a20c3c0..5c6b74c36b60 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -479,6 +479,7 @@ void xe_display_pm_runtime_resume(struct xe_device *xe) return; } + intel_hpd_init(xe); intel_hpd_poll_disable(xe); } -- cgit v1.2.3 From 90521df5fc43980e4575bd8c5b1cb62afe1a9f5f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 2 Oct 2024 17:16:56 -0700 Subject: drm/xe: Take job list lock in xe_sched_add_pending_job A fragile micro optimization in xe_sched_add_pending_job relied on both the GPU scheduler being stopped and fence signaling stopped to safely add a job to the pending list without the job list lock in xe_sched_add_pending_job. Remove this optimization and just take the job list lock. Fixes: 7ddb9403dd74 ("drm/xe: Sample ctx timestamp to determine if jobs have timed out") Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241003001657.3517883-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 5ad5629a6c60..64b2ae6839db 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -63,7 +63,9 @@ xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, struct xe_sched_job *job) { + spin_lock(&sched->base.job_list_lock); list_add(&job->drm.list, &sched->base.pending_list); + spin_unlock(&sched->base.job_list_lock); } static inline -- cgit v1.2.3 From ea2f6a77d0c40d97f4a4dc93fee4afe15d94926d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 2 Oct 2024 17:16:57 -0700 Subject: drm/xe: Don't free job in TDR Freeing job in TDR is not safe as TDR can pass the run_job thread resulting in UAF. It is only safe for free job to naturally be called by the scheduler. Rather free job in TDR, add to pending list. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2811 Cc: Matthew Auld Fixes: e275d61c5f3f ("drm/xe/guc: Handle timing out of signaled jobs gracefully") Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241003001657.3517883-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index c1ebc693a617..0e5649b394b6 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1105,10 +1105,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. + * immediately closed after last fence signaled. Add back to pending + * list so job can be freed and kick scheduler ensuring free job is not + * lost. */ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); return DRM_GPU_SCHED_STAT_NOMINAL; } -- cgit v1.2.3 From b8b1163248759ba18509f7443a2d19b15b4c1df8 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 11 Sep 2024 08:26:22 -0700 Subject: drm/xe: Use bookkeep slots for external BO's in exec IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix external BO's dma-resv usage in exec IOCTL using bookkeep slots rather than write slots. This leaves syncing to user space rather than the KMD blindly enforcing write semantics on every external BO. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: José Roberto de Souza Cc: Kenneth Graunke Cc: Paulo Zanoni Reported-by: Simona Vetter Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2673 Signed-off-by: Matthew Brost Reviewed-by: José Roberto de Souza Reviewed-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20240911152622.903058-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7b38485817dc..f23ac1e2ed88 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -41,11 +41,6 @@ * user knows an exec writes to a BO and reads from the BO in the next exec, it * is the user's responsibility to pass in / out fence between the two execs). * - * Implicit dependencies for external BOs are handled by using the dma-buf - * implicit dependency uAPI (TODO: add link). To make this works each exec must - * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external - * BO mapped in the VM. - * * We do not allow a user to trigger a bind at exec time rather we have a VM * bind IOCTL which uses the same in / out fence interface as exec. In that * sense, a VM bind is basically the same operation as an exec from the user @@ -59,8 +54,8 @@ * behind any pending kernel operations on any external BOs in VM or any BOs * private to the VM. This is accomplished by the rebinds waiting on BOs * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs - * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and - * in DMA_RESV_USAGE_WRITE for external BOs). + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKKEEP for private BOs and + * for external BOs). * * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute * mode VMs we use preempt fences and a rebind worker (TODO: add link). @@ -304,7 +299,8 @@ retry: xe_sched_job_arm(job); if (!xe_vm_in_lr_mode(vm)) drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_BOOKKEEP, + DMA_RESV_USAGE_BOOKKEEP); for (i = 0; i < num_syncs; i++) { xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished); -- cgit v1.2.3 From 9d559cdcb21f42188d4c3ff3b4fe42b240f4af5d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2024 20:56:16 -0700 Subject: drm/xe/query: Increase timestamp width Starting with Xe2 the timestamp is a full 64 bit counter, contrary to the 36 bit that was available before. Although 36 should be sufficient for any reasonable delta calculation (for Xe2, of about 30min), it's surprising to userspace to get something truncated. Also if the timestamp being compared to is coming from the GPU and the application is not careful enough to apply the width there, a delta calculation would be wrong. Extend it to full 64-bits starting with Xe2. v2: Expand width=64 to media gt, as it's just a wrong tagging in the spec - empirical tests show it goes beyond 36 bits and match the engines for the main gt Bspec: 60411 Cc: Szymon Morek Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241011035618.1057602-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_query.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 158629971eab..07093af0b32e 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -164,7 +164,11 @@ query_engine_cycles(struct xe_device *xe, cpu_clock); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - resp.width = 36; + + if (GRAPHICS_VER(xe) >= 20) + resp.width = 64; + else + resp.width = 36; /* Only write to the output fields of user query */ if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) -- cgit v1.2.3 From 5c84985b07acc0fefd2d619c0bb03eed18f769b5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2024 20:56:17 -0700 Subject: drm/xe/query: Move timestamp reg to hwe_read_timestamp() __read_timestamps() is actually reading the timestamp from a certain hwe. Use it as parameter, move register declarations to be inside that function and rename it. Reviewed-by: Sai Teja Pottumuttu Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241011035618.1057602-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_query.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 07093af0b32e..69307ff4146a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -85,16 +85,13 @@ static __ktime_func_t __clock_id_to_func(clockid_t clk_id) } static void -__read_timestamps(struct xe_gt *gt, - struct xe_reg lower_reg, - struct xe_reg upper_reg, - u64 *engine_ts, - u64 *cpu_ts, - u64 *cpu_delta, - __ktime_func_t cpu_clock) +hwe_read_timestamp(struct xe_hw_engine *hwe, u64 *engine_ts, u64 *cpu_ts, + u64 *cpu_delta, __ktime_func_t cpu_clock) { - struct xe_mmio *mmio = >->mmio; + struct xe_mmio *mmio = &hwe->gt->mmio; u32 upper, lower, old_upper, loop = 0; + struct xe_reg upper_reg = RING_TIMESTAMP_UDW(hwe->mmio_base), + lower_reg = RING_TIMESTAMP(hwe->mmio_base); upper = xe_mmio_read32(mmio, upper_reg); do { @@ -155,13 +152,8 @@ query_engine_cycles(struct xe_device *xe, if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)) return -EIO; - __read_timestamps(gt, - RING_TIMESTAMP(hwe->mmio_base), - RING_TIMESTAMP_UDW(hwe->mmio_base), - &resp.engine_cycles, - &resp.cpu_timestamp, - &resp.cpu_delta, - cpu_clock); + hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp, + &resp.cpu_delta, cpu_clock); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); -- cgit v1.2.3 From 735be7acc52fe8f9e29c4327de0993f2c946acba Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2024 20:56:18 -0700 Subject: drm/xe/query: Tidy up error EFAULT returns Move the error handling together in a single branch since all of them are doing similar thing and return the same error. Reviewed-by: Sai Teja Pottumuttu Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241011035618.1057602-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_query.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 69307ff4146a..5093a243e9fe 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -163,16 +163,10 @@ query_engine_cycles(struct xe_device *xe, resp.width = 36; /* Only write to the output fields of user query */ - if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) - return -EFAULT; - - if (put_user(resp.cpu_delta, &query_ptr->cpu_delta)) - return -EFAULT; - - if (put_user(resp.engine_cycles, &query_ptr->engine_cycles)) - return -EFAULT; - - if (put_user(resp.width, &query_ptr->width)) + if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp) || + put_user(resp.cpu_delta, &query_ptr->cpu_delta) || + put_user(resp.engine_cycles, &query_ptr->engine_cycles) || + put_user(resp.width, &query_ptr->width)) return -EFAULT; return 0; -- cgit v1.2.3 From ec7e6a1d527755fc3c7a3303eaa5577aac5cf6be Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 11 Oct 2024 17:10:29 +0200 Subject: drm/xe/ufence: ufence can be signaled right after wait_woken do_comapre() can return success after a timedout wait_woken() which was treated as -ETIME. The loop calling wait_woken() sets correct err so there is no need to re-evaluate err. v2: Remove entire check that reevaluate err at the end(Matt) Fixes: e670f0b4ef24 ("drm/xe/uapi: Return correct error code for xe_wait_user_fence_ioctl") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1630 Cc: stable@vger.kernel.org # v6.8+ Cc: Bommu Krishnaiah Cc: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241011151029.4160630-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index d46fa8374980..f5deb81eba01 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -169,9 +169,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, args->timeout = 0; } - if (!timeout && !(err < 0)) - err = -ETIME; - if (q) xe_exec_queue_put(q); -- cgit v1.2.3 From 26f69e88dcc95fffc62ed2aea30ad7b1fdf31fdb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 11 Oct 2024 14:36:34 +0100 Subject: drm/xe/xe_sync: initialise ufence.signalled We can incorrectly think that the fence has signalled, if we get a non-zero value here from the kmalloc, which is quite plausible. Just use kzalloc to prevent stuff like this. Fixes: 977e5b82e090 ("drm/xe: Expose user fence from xe_sync_entry") Signed-off-by: Matthew Auld Cc: Mika Kuoppala Cc: Matthew Brost Cc: Nirmoy Das Cc: # v6.10+ Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241011133633.388008-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index bb3c2a830362..c6cf227ead40 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -58,7 +58,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, if (!access_ok(ptr, sizeof(*ptr))) return ERR_PTR(-EFAULT); - ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); + ufence = kzalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 3ad86ae1da97d0091f673f08846848714f6dd745 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Wed, 9 Oct 2024 18:19:46 +0300 Subject: drm/xe: add interface to request physical alignment for buffer objects Add xe_bo_create_pin_map_at_aligned() which augment xe_bo_create_pin_map_at() with alignment parameter allowing to pass required alignemnt if it differ from default. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Jonathan Cavitt Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20241009151947.2240099-2-juhapekka.heikkila@gmail.com --- .../xe/compat-i915-headers/gem/i915_gem_stolen.h | 2 +- drivers/gpu/drm/xe/xe_bo.c | 29 +++++++++++++++++----- drivers/gpu/drm/xe/xe_bo.h | 8 +++++- drivers/gpu/drm/xe/xe_bo_types.h | 5 ++++ drivers/gpu/drm/xe/xe_ggtt.c | 2 +- 5 files changed, 37 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index cb6c7598824b..9c4cf050059a 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -29,7 +29,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), NULL, size, start, end, - ttm_bo_type_kernel, flags); + ttm_bo_type_kernel, flags, 0); if (IS_ERR(bo)) { err = PTR_ERR(bo); bo = NULL; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 5e8f60a8d431..d5d30a0ff1e7 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1454,7 +1454,8 @@ static struct xe_bo * __xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, - u16 cpu_caching, enum ttm_bo_type type, u32 flags) + u16 cpu_caching, enum ttm_bo_type type, u32 flags, + u64 alignment) { struct xe_bo *bo = NULL; int err; @@ -1483,6 +1484,8 @@ __xe_bo_create_locked(struct xe_device *xe, if (IS_ERR(bo)) return bo; + bo->min_align = alignment; + /* * Note that instead of taking a reference no the drm_gpuvm_resv_bo(), * to ensure the shared resv doesn't disappear under the bo, the bo @@ -1523,16 +1526,18 @@ struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, u64 alignment) { - return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags); + return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, + flags, alignment); } struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) { - return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags); + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, + flags, 0); } struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, @@ -1542,7 +1547,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, cpu_caching, ttm_bo_type_device, - flags | XE_BO_FLAG_USER); + flags | XE_BO_FLAG_USER, 0); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -1565,6 +1570,17 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile struct xe_vm *vm, size_t size, u64 offset, enum ttm_bo_type type, u32 flags) +{ + return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset, + type, flags, 0); +} + +struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, + struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags, + u64 alignment) { struct xe_bo *bo; int err; @@ -1576,7 +1592,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile flags |= XE_BO_FLAG_GGTT; bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS, + alignment); if (IS_ERR(bo)) return bo; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 6e4be52306df..41624159a291 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -77,7 +77,7 @@ struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags); + enum ttm_bo_type type, u32 flags, u64 alignment); struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); @@ -94,6 +94,12 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 offset, enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, + struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags, + u64 alignment); struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, enum ttm_bo_type type, u32 flags); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 2ed558ac2264..35372c46edfa 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -76,6 +76,11 @@ struct xe_bo { /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ struct list_head vram_userfault_link; + + /** @min_align: minimum alignment needed for this BO if different + * from default + */ + u64 min_align; }; #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 47bfd9d2635d..1b3178226987 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -603,7 +603,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { int err; - u64 alignment = XE_PAGE_SIZE; + u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; -- cgit v1.2.3 From b0228a337de88db809e2c7f9d6c18fccc9d85c69 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Wed, 9 Oct 2024 18:19:47 +0300 Subject: drm/xe/display: align framebuffers according to hw requirements Align framebuffers in memory according to hw requirements instead of default page size alignment. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Jonathan Cavitt Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20241009151947.2240099-3-juhapekka.heikkila@gmail.com --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 57 +++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index b58fc4ba2aac..9194993910e3 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -79,7 +79,8 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, - struct i915_vma *vma) + struct i915_vma *vma, + u64 physical_alignment) { struct xe_device *xe = to_xe_device(fb->base.dev); struct xe_tile *tile0 = xe_device_get_root_tile(xe); @@ -98,23 +99,29 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_PAGE_SIZE); if (IS_DGFX(xe)) - dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM0 | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE); + dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + physical_alignment); else - dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE); + dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + physical_alignment); if (IS_ERR(dpt)) - dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE); + dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + physical_alignment); if (IS_ERR(dpt)) return PTR_ERR(dpt); @@ -183,7 +190,8 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, - struct i915_vma *vma) + struct i915_vma *vma, + u64 physical_alignment) { struct xe_bo *bo = intel_fb_obj(&fb->base); struct xe_device *xe = to_xe_device(fb->base.dev); @@ -264,7 +272,8 @@ out: } static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, - const struct i915_gtt_view *view) + const struct i915_gtt_view *view, + u64 physical_alignment) { struct drm_device *dev = fb->base.dev; struct xe_device *xe = to_xe_device(dev); @@ -312,9 +321,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, vma->bo = bo; if (intel_fb_uses_dpt(&fb->base)) - ret = __xe_pin_fb_vma_dpt(fb, view, vma); + ret = __xe_pin_fb_vma_dpt(fb, view, vma, physical_alignment); else - ret = __xe_pin_fb_vma_ggtt(fb, view, vma); + ret = __xe_pin_fb_vma_ggtt(fb, view, vma, physical_alignment); if (ret) goto err_unpin; @@ -355,7 +364,7 @@ intel_fb_pin_to_ggtt(const struct drm_framebuffer *fb, { *out_flags = 0; - return __xe_pin_fb_vma(to_intel_framebuffer(fb), view); + return __xe_pin_fb_vma(to_intel_framebuffer(fb), view, phys_alignment); } void intel_fb_unpin_vma(struct i915_vma *vma, unsigned long flags) @@ -368,11 +377,15 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) struct drm_framebuffer *fb = plane_state->hw.fb; struct xe_bo *bo = intel_fb_obj(fb); struct i915_vma *vma; + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + u64 phys_alignment = plane->min_alignment(plane, fb, 0); /* We reject creating !SCANOUT fb's, so this is weird.. */ drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT)); - vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt); + vma = __xe_pin_fb_vma(intel_fb, &plane_state->view.gtt, phys_alignment); + if (IS_ERR(vma)) return PTR_ERR(vma); -- cgit v1.2.3 From 73e8e2f9a358caa005ed6e52dcb7fa2bca59d132 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Mon, 7 Oct 2024 21:28:41 +0300 Subject: drm/i915/display: Don't allow tile4 framebuffer to do hflip on display20 or greater On display ver 20 onwards tile4 is not supported with horizontal flip Bspec: 69853 Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Sai Teja Pottumuttu Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20241007182841.2104740-1-juhapekka.heikkila@gmail.com --- drivers/gpu/drm/i915/display/intel_fb.c | 13 +++++++++++++ drivers/gpu/drm/i915/display/intel_fb.h | 1 + drivers/gpu/drm/i915/display/skl_universal_plane.c | 11 +++++++++++ 3 files changed, 25 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 5be7bb43e2e0..35557d98d7a7 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -438,6 +438,19 @@ bool intel_fb_needs_64k_phys(u64 modifier) INTEL_PLANE_CAP_NEED64K_PHYS); } +/** + * intel_fb_is_tile4_modifier: Check if a modifier is a tile4 modifier type + * @modifier: Modifier to check + * + * Returns: + * Returns %true if @modifier is a tile4 modifier. + */ +bool intel_fb_is_tile4_modifier(u64 modifier) +{ + return plane_caps_contain_any(lookup_modifier(modifier)->plane_caps, + INTEL_PLANE_CAP_TILING_4); +} + static bool check_modifier_display_ver_range(const struct intel_modifier_desc *md, u8 display_ver_from, u8 display_ver_until) { diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 10de437e8ef8..827be3f7934c 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -35,6 +35,7 @@ bool intel_fb_is_ccs_modifier(u64 modifier); bool intel_fb_is_rc_ccs_cc_modifier(u64 modifier); bool intel_fb_is_mc_ccs_modifier(u64 modifier); bool intel_fb_needs_64k_phys(u64 modifier); +bool intel_fb_is_tile4_modifier(u64 modifier); bool intel_fb_is_ccs_aux_plane(const struct drm_framebuffer *fb, int color_plane); int intel_fb_rc_ccs_cc_plane(const struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 17d4c880ecc4..c8720d31d101 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1591,6 +1591,17 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, return -EINVAL; } + /* + * Display20 onward tile4 hflip is not supported + */ + if (rotation & DRM_MODE_REFLECT_X && + intel_fb_is_tile4_modifier(fb->modifier) && + DISPLAY_VER(dev_priv) >= 20) { + drm_dbg_kms(&dev_priv->drm, + "horizontal flip is not supported with tile4 surface formats\n"); + return -EINVAL; + } + if (drm_rotation_90_or_270(rotation)) { if (!intel_fb_supports_90_270_rotation(to_intel_framebuffer(fb))) { drm_dbg_kms(&dev_priv->drm, -- cgit v1.2.3