From 84a1ed5e67565b09b8fd22a26754d2897de55ce0 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 22 Feb 2024 18:23:56 -0500 Subject: drm/xe/uapi: Remove unused flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those cases missed in previous uAPI cleanups were mostly accidentally brought in from i915 or created to exercise the possibilities of gpuvm but they are not used by userspace yet, so let's remove them. They can still be brought back later if needed. v2: - Fix XE_VM_FLAG_FAULT_MODE support in xe_lrc.c (Brian Welty) - Leave DRM_XE_VM_BIND_OP_UNMAP_ALL (José Roberto de Souza) - Ensure invalid flag values are rejected (Rodrigo Vivi) v3: Rebase after removal of persistent exec_queues (Francois Dugast) v4: Rodrigo: Rebase after the new dumpable flag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232356.175431-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 94 +------------------------------- drivers/gpu/drm/xe/xe_exec_queue_types.h | 10 ---- drivers/gpu/drm/xe/xe_lrc.c | 10 +--- drivers/gpu/drm/xe/xe_vm.c | 12 +--- drivers/gpu/drm/xe/xe_vm_types.h | 4 -- include/uapi/drm/xe_drm.h | 19 ------- 6 files changed, 6 insertions(+), 143 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 4bb8f897bf15..11e150f4c0c1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -354,91 +354,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return 0; } -static int exec_queue_set_preemption_timeout(struct xe_device *xe, - struct xe_exec_queue *q, u64 value, - bool create) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - if (!create) - return q->ops->set_preempt_timeout(q, value); - - q->sched_props.preempt_timeout_us = value; - return 0; -} - -static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - u32 min = 0, max = 0; - - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - q->sched_props.job_timeout_ms = value; - - return 0; -} - -static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_trigger = value; - - return 0; -} - -static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_notify = value; - - return 0; -} - -static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - if (value > DRM_XE_ACC_GRANULARITY_64M) - return -EINVAL; - - q->usm.acc_granularity = value; - - return 0; -} - typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create); @@ -446,11 +361,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -469,7 +379,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index c40240e88068..62b3d9d1d7cd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -141,16 +141,6 @@ struct xe_exec_queue { spinlock_t lock; } compute; - /** @usm: unified shared memory state */ - struct { - /** @usm.acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @usm.acc_notify: access counter notify */ - u32 acc_notify; - /** @usm.acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 8c85e90220de..7ad853b0788a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -706,8 +706,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) -#define ACC_GRANULARITY_S 20 -#define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) @@ -778,13 +776,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (q->usm.acc_granularity << - ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.has_usm && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (q->usm.acc_notify << ACC_NOTIFY_S) | - q->usm.acc_trigger); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e3bde897f6e8..db3f049a47dc 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2131,10 +2131,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; @@ -2329,8 +2325,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; flags |= op->map.dumpable ? @@ -2475,7 +2469,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), + !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2750,9 +2744,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7d4f810f9c04..3fce50b91256 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -294,10 +294,6 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; - /** @immediate: Immediate bind */ - bool immediate; - /** @read_only: Read only */ - bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @dumpable: whether BO is dumped on GPU hang */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2fefec9c0e94..538a3ac95c54 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -862,10 +862,6 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: - * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the - * MAP operation immediately rather than deferring the MAP to the page - * fault handler. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -958,8 +954,6 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ @@ -1076,19 +1070,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 -/* Monitor 128KB contiguous region with 4K sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_128K 0 -/* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_2M 1 -/* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_16M 2 -/* Monitor 64MB contiguous region with 2M sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_64M 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From ddadc7120d4be7a40a9745924339c472c5850d14 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:19 -0800 Subject: drm/xe: Fix execlist splat Although execlist submission is not supported it should be kept in a basic working state as it can be used for very early hardware bring up. Fix the below splat. WARNING: CPU: 3 PID: 11 at drivers/gpu/drm/xe/xe_execlist.c:217 execlist_run_job+0x1c2/0x220 [xe] Modules linked in: xe drm_kunit_helpers drm_gpuvm drm_ttm_helper ttm drm_exec drm_suballoc_helper drm_buddy gpu_sched mei_pxp mei_hdcp wmi_bmof x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core video snd_pcm mei_me mei wmi fuse e1000e i2c_i801 ptp i2c_smbus pps_core intel_lpss_pci CPU: 3 PID: 11 Comm: kworker/u16:0 Tainted: G U 6.8.0-rc3-guc+ #1046 Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020 Workqueue: rcs0 drm_sched_run_job_work [gpu_sched] RIP: 0010:execlist_run_job+0x1c2/0x220 [xe] Code: 8b f8 03 00 00 4c 89 39 e9 e2 fe ff ff 49 8d 7d 20 be ff ff ff ff e8 ed fd a6 e1 85 c0 0f 85 e1 fe ff ff 0f 0b e9 da fe ff ff <0f> 0b 0f 0b 41 83 fc 03 0f 86 8a fe ff ff 0f 0b e9 83 fe ff ff be RSP: 0018:ffffc9000013bdb8 EFLAGS: 00010246 RAX: ffff888105021a00 RBX: ffff888105078400 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffc9000013bd14 RDI: ffffc90001609090 RBP: ffff88811e3f0040 R08: 0000000000000088 R09: 00000000ffffff81 R10: 0000000000000001 R11: ffff88810c10c000 R12: 00000000fffffffe R13: ffff888109b72c28 R14: ffff8881050784a0 R15: ffff888105078408 FS: 0000000000000000(0000) GS:ffff88849f980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563459d130f8 CR3: 000000000563a001 CR4: 0000000000f70ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x7f/0x170 ? execlist_run_job+0x1c2/0x220 [xe] ? report_bug+0x1c7/0x1d0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? execlist_run_job+0x1c2/0x220 [xe] ? execlist_run_job+0x2c/0x220 [xe] drm_sched_run_job_work+0x246/0x3f0 [gpu_sched] ? process_one_work+0x18d/0x4e0 process_one_work+0x1f7/0x4e0 worker_thread+0x1da/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0xfe/0x130 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 9b9529ce379a ("drm/xe: Rename engine to exec_queue") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_execlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 1788e78caf5c..dece2785933c 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_exec_queue_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->q->sched_props.priority; XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); -- cgit v1.2.3 From 3121fed0c51beb8ea7b18ab2ceff1ac9e358ac53 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:20 -0800 Subject: drm/xe: Cleanup some layering in GGTT xe_ggtt.c touched GuC layers which is incorrect. Call into xe_gt_tlb_invalidation layer instead. Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 7 +++++ drivers/gpu/drm/xe/xe_ggtt.c | 37 +++++--------------------- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 41 ++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 2 +- 4 files changed, 54 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 92320bbc9d3d..4e7f809d2b00 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -110,6 +110,13 @@ #define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4) #define MED_VF_SW_FLAG_COUNT 4 +#define GUC_TLB_INV_CR XE_REG(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) +#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) + /* GuC Interrupt Vector */ #define GUC_INTR_GUC2HOST REG_BIT(15) #define GUC_INTR_EXEC_ERROR REG_BIT(14) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ab96edb058d6..5d46958e3144 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -19,7 +19,6 @@ #include "xe_gt_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" -#include "xe_mmio.h" #include "xe_sriov.h" #include "xe_wopcm.h" @@ -249,15 +248,11 @@ err: return err; } -#define GUC_TLB_INV_CR XE_REG(0xcee8) -#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) -#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) -#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) - static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + int err; + if (!gt) return; @@ -267,29 +262,9 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) * and GuC are accessible. */ xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - - /* TODO: vfunc for GuC vs. non-GuC */ - - if (gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - xe_gt_assert(gt, seqno > 0); - if (seqno > 0) - xe_gt_tlb_invalidation_wait(gt, seqno); - } else if (xe_device_uc_enabled(gt_to_xe(gt))) { - struct xe_device *xe = gt_to_xe(gt); - - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, - PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, - PVC_GUC_TLB_INV_DESC0_VALID); - } else - xe_mmio_write32(gt, GUC_TLB_INV_CR, - GUC_TLB_INV_CR_INVALIDATE); - } - + err = xe_gt_tlb_invalidation_ggtt(gt); + if (err) + drm_warn(&xe->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index e3a4131ebb58..a7b1f7cfec87 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -11,7 +11,9 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_mmio.h" #include "xe_trace.h" +#include "regs/xe_guc_regs.h" #define TLB_TIMEOUT (HZ / 4) @@ -210,7 +212,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ -int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -222,6 +224,43 @@ int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) ARRAY_SIZE(action)); } +/** + * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT + * @gt: graphics tile + * + * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is + * synchronous. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe_guc_ct_enabled(>->uc.guc.ct) && + gt->uc.guc.submission_state.enabled) { + int seqno; + + seqno = xe_gt_tlb_invalidation_guc(gt); + if (seqno <= 0) + return seqno; + + xe_gt_tlb_invalidation_wait(gt, seqno); + } else if (xe_device_uc_enabled(xe)) { + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, + PVC_GUC_TLB_INV_DESC0_VALID); + } else { + xe_mmio_write32(gt, GUC_TLB_INV_CR, + GUC_TLB_INV_CR_INVALIDATE); + } + } + + return 0; +} + /** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA * @gt: graphics tile diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index b333c1709397..fbb743d80d2c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -16,7 +16,7 @@ struct xe_vma; int xe_gt_tlb_invalidation_init(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation_guc(struct xe_gt *gt); +int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); -- cgit v1.2.3 From a9e483dda3efa5b9aae5d9eef94d2c3a878d9bea Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:21 -0800 Subject: drm/xe: Don't support execlists in xe_gt_tlb_invalidation layer The xe_gt_tlb_invalidation layer implements TLB invalidations for a GuC backend. Simply return if in execlists mode. A follow up may properly implement the xe_gt_tlb_invalidation layer for both GuC and execlists. Fixes: a9351846d945 ("drm/xe: Break of TLB invalidation into its own file") Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index a7b1f7cfec87..f29ee1ccfa71 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -287,6 +287,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_gt_assert(gt, vma); + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { @@ -355,6 +363,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) struct xe_guc *guc = >->uc.guc; int ret; + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) + return 0; + /* * XXX: See above, this algorithm only works if seqno are always in * order -- cgit v1.2.3 From 30c399529f4c64e9671cba832630629d9cd08bf3 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:24 -0500 Subject: drm/xe: Document Xe PM component Replace outdated information with a proper PM documentation. Already establish the rules for the runtime PM get and put that Xe needs to follow. Also add missing function documentation to all the "exported" functions. v2: updated after Francois' feedback. s/grater/greater (Matt) v3: detach D3 from runtime_pm remove opportunistic S0iX (Anshuman) Cc: Matthew Auld Cc: Anshuman Gupta Acked-by: Francois Dugast Reviewed-by: Francois Dugast #v2 Signed-off-by: Rodrigo Vivi Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_pm.c | 109 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ab283e9a8b4e..ae220f21acb1 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -25,21 +25,47 @@ /** * DOC: Xe Power Management * - * Xe PM shall be guided by the simplicity. - * Use the simplest hook options whenever possible. - * Let's not reinvent the runtime_pm references and hooks. - * Shall have a clear separation of display and gt underneath this component. + * Xe PM implements the main routines for both system level suspend states and + * for the opportunistic runtime suspend states. * - * What's next: + * System Level Suspend (S-States) - In general this is OS initiated suspend + * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), + * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They + * are the main point for the suspend to and resume from these states. * - * For now s2idle and s3 are only working in integrated devices. The next step - * is to iterate through all VRAM's BO backing them up into the system memory - * before allowing the system suspend. + * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power + * state D3, controlled by the PCI subsystem and ACPI with the help from the + * runtime_pm infrastructure. + * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory + * alive and quicker low latency resume or D3Cold where Vcc power is off for + * better power savings. + * The Vcc control of PCI hierarchy can only be controlled at the PCI root port + * level, while the device driver can be behind multiple bridges/switches and + * paired with other devices. For this reason, the PCI subsystem cannot perform + * the transition towards D3Cold. The lowest runtime PM possible from the PCI + * subsystem is D3hot. Then, if all these paired devices in the same root port + * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) + * to perform the transition from D3hot to D3cold. Xe may disallow this + * transition by calling pci_d3cold_disable(root_pdev) before going to runtime + * suspend. It will be based on runtime conditions such as VRAM usage for a + * quick and low latency resume for instance. * - * Also runtime_pm needs to be here from the beginning. + * Runtime PM - This infrastructure provided by the Linux kernel allows the + * device drivers to indicate when the can be runtime suspended, so the device + * could be put at D3 (if supported), or allow deeper package sleep states + * (PC-states), and/or other low level power states. Xe PM component provides + * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI + * subsystem will call before transition to/from runtime suspend. * - * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC - * and no wait boost. Frequency optimizations should come on a next stage. + * Also, Xe PM provides get and put functions that Xe driver will use to + * indicate activity. In order to avoid locking complications with the memory + * management, whenever possible, these get and put functions needs to be called + * from the higher/outer levels. + * The main cases that need to be protected from the outer levels are: IOCTL, + * sysfs, debugfs, dma-buf sharing, GPU execution. + * + * This component is not responsible for GT idleness (RC6) nor GT frequency + * management (RPS). */ /** @@ -178,6 +204,12 @@ void xe_pm_init_early(struct xe_device *xe) drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); } +/** + * xe_pm_init - Initialize Xe Power Management + * @xe: xe device instance + * + * This component is responsible for System and Device sleep states. + */ void xe_pm_init(struct xe_device *xe) { /* For now suspend/resume is only allowed with GuC */ @@ -196,6 +228,10 @@ void xe_pm_init(struct xe_device *xe) xe_pm_runtime_init(xe); } +/** + * xe_pm_runtime_fini - Finalize Runtime PM + * @xe: xe device instance + */ void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -225,6 +261,12 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) return READ_ONCE(xe->pm_callback_task); } +/** + * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold + * @xe: xe device instance + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_runtime_suspend(struct xe_device *xe) { struct xe_bo *bo, *on; @@ -290,6 +332,12 @@ out: return err; } +/** + * xe_pm_runtime_resume - Waking up from D3hot/D3Cold + * @xe: xe device instance + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_runtime_resume(struct xe_device *xe) { struct xe_gt *gt; @@ -341,22 +389,47 @@ out: return err; } +/** + * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ int xe_pm_runtime_get(struct xe_device *xe) { return pm_runtime_get_sync(xe->drm.dev); } +/** + * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ int xe_pm_runtime_put(struct xe_device *xe) { pm_runtime_mark_last_busy(xe->drm.dev); return pm_runtime_put(xe->drm.dev); } +/** + * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ int xe_pm_runtime_get_if_active(struct xe_device *xe) { return pm_runtime_get_if_active(xe->drm.dev, true); } +/** + * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge + * @xe: xe device instance + */ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -371,6 +444,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } } +/** + * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * @xe: xe device instance + * @threshold: VRAM size in bites for the D3cold threshold + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { struct ttm_resource_manager *man; @@ -395,6 +475,13 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) return 0; } +/** + * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed + * @xe: xe device instance + * + * To be called during runtime_pm idle callback. + * Check for all the D3Cold conditions ahead of runtime suspend. + */ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) { struct ttm_resource_manager *man; -- cgit v1.2.3 From 0f9d886f0bf93394b09b1dfe5397f7842c0f48b9 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:25 -0500 Subject: drm/xe: Convert mem_access assertion towards the runtime_pm state The mem_access helpers are going away and getting replaced by direct calls of the xe_pm_runtime_{get,put} functions. However, an assertion with a warning splat is desired when we hit the worst case of a memory access with the device really in the 'suspended' state. Also, this needs to be the first step. Otherwise, the upcoming conversion would be really noise with warn splats of missing mem_access gets. v2: Minor doc changes as suggested by Matt Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-2-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_device.c | 13 ++++++++++++- drivers/gpu/drm/xe/xe_pm.c | 16 ++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ca85e81fdb44..049bbca6ed56 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -620,9 +620,20 @@ bool xe_device_mem_access_ongoing(struct xe_device *xe) return atomic_read(&xe->mem_access.ref); } +/** + * xe_device_assert_mem_access - Inspect the current runtime_pm state. + * @xe: xe device instance + * + * To be used before any kind of memory access. It will splat a debug warning + * if the device is currently sleeping. But it doesn't guarantee in any way + * that the device is going to remain awake. Xe PM runtime get and put + * functions might be added to the outer bound of the memory access, while + * this check is intended for inner usage to splat some warning if the worst + * case has just happened. + */ void xe_device_assert_mem_access(struct xe_device *xe) { - XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); + XE_WARN_ON(xe_pm_runtime_suspended(xe)); } bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ae220f21acb1..5289d8c0c3b1 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -261,6 +261,22 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) return READ_ONCE(xe->pm_callback_task); } +/** + * xe_pm_runtime_suspended - Check if runtime_pm state is suspended + * @xe: xe device instance + * + * This does not provide any guarantee that the device is going to remain + * suspended as it might be racing with the runtime state transitions. + * It can be used only as a non-reliable assertion, to ensure that we are not in + * the sleep state while trying to access some memory for instance. + * + * Returns true if PCI device is suspended, false otherwise. + */ +bool xe_pm_runtime_suspended(struct xe_device *xe) +{ + return pm_runtime_suspended(xe->drm.dev); +} + /** * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 64a97c6726a7..75919eba1746 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -23,6 +23,7 @@ int xe_pm_resume(struct xe_device *xe); void xe_pm_init_early(struct xe_device *xe); void xe_pm_init(struct xe_device *xe); void xe_pm_runtime_fini(struct xe_device *xe); +bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); int xe_pm_runtime_get(struct xe_device *xe); -- cgit v1.2.3 From 23cf006beac3db89f946a52c962cd16c82066c5c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:26 -0500 Subject: drm/xe: Runtime PM wake on every IOCTL Let's ensure our PCI device is awaken on every IOCTL entry. Let's increase the runtime_pm protection and start moving that to the outer bounds. v2: minor typo fix and renaming function to make it clear that is intended to be used by ioctl only. (Matt) v3: Make it NULL if CONFIG_COMPAT is not selected. Cc: Matthew Auld Reviewed-by: Francois Dugast Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-3-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_device.c | 37 +++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_pm.c | 15 +++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 049bbca6ed56..919ad88f0495 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -136,15 +136,48 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), }; +static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct drm_file *file_priv = file->private_data; + struct xe_device *xe = to_xe_device(file_priv->minor->dev); + long ret; + + ret = xe_pm_runtime_get_ioctl(xe); + if (ret >= 0) + ret = drm_ioctl(file, cmd, arg); + xe_pm_runtime_put(xe); + + return ret; +} + +#ifdef CONFIG_COMPAT +static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct drm_file *file_priv = file->private_data; + struct xe_device *xe = to_xe_device(file_priv->minor->dev); + long ret; + + ret = xe_pm_runtime_get_ioctl(xe); + if (ret >= 0) + ret = drm_compat_ioctl(file, cmd, arg); + xe_pm_runtime_put(xe); + + return ret; +} +#else +/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */ +#define xe_drm_compat_ioctl NULL +#endif + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release_noglobal, - .unlocked_ioctl = drm_ioctl, + .unlocked_ioctl = xe_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, - .compat_ioctl = drm_compat_ioctl, + .compat_ioctl = xe_drm_compat_ioctl, .llseek = noop_llseek, #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 5289d8c0c3b1..b5511e3c3153 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -430,6 +430,21 @@ int xe_pm_runtime_put(struct xe_device *xe) return pm_runtime_put(xe->drm.dev); } +/** + * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ +int xe_pm_runtime_get_ioctl(struct xe_device *xe) +{ + if (WARN_ON(xe_pm_read_callback_task(xe) == current)) + return -ELOOP; + + return pm_runtime_get_sync(xe->drm.dev); +} + /** * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 75919eba1746..7f5884babb29 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -27,6 +27,7 @@ bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); int xe_pm_runtime_get(struct xe_device *xe); +int xe_pm_runtime_get_ioctl(struct xe_device *xe); int xe_pm_runtime_put(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); -- cgit v1.2.3 From 6a0784e6001f42b87af21ab63b3935ef4d6c6407 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:27 -0500 Subject: drm/xe: Convert kunit tests from mem_access to xe_pm_runtime Let's convert the kunit tests that are currently relying on xe_device_mem_access_{get,put} towards the direct xe_pm_runtime_{get,put}. While doing this we need to move the get/put calls towards the outer bounds of the tests to ensure consistency with the other usages of pm_runtime on the regular paths. v2: include xe_pm.h in tests/xe_mocs.c and sort the include block while at it. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-4-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/tests/xe_bo.c | 8 ++++---- drivers/gpu/drm/xe/tests/xe_migrate.c | 7 +++++-- drivers/gpu/drm/xe/tests/xe_mocs.c | 19 +++++++++++++------ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 3436fd9cf2b2..0926a1c2eb86 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -163,7 +163,7 @@ static int ccs_test_run_device(struct xe_device *xe) return 0; } - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); for_each_tile(tile, xe, id) { /* For igfx run only for primary tile */ @@ -172,7 +172,7 @@ static int ccs_test_run_device(struct xe_device *xe) ccs_test_run_tile(xe, tile, test); } - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } @@ -335,12 +335,12 @@ static int evict_test_run_device(struct xe_device *xe) return 0; } - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); for_each_tile(tile, xe, id) evict_test_run_tile(xe, tile, test); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index a6523df0f1d3..ce531498f57f 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -10,6 +10,7 @@ #include "tests/xe_pci_test.h" #include "xe_pci.h" +#include "xe_pm.h" static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, const char *str, struct kunit *test) @@ -423,17 +424,19 @@ static int migrate_test_run_device(struct xe_device *xe) struct xe_tile *tile; int id; + xe_pm_runtime_get(xe); + for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; kunit_info(test, "Testing tile id %d.\n", id); xe_vm_lock(m->q->vm, true); - xe_device_mem_access_get(xe); xe_migrate_sanity_test(m, test); - xe_device_mem_access_put(xe); xe_vm_unlock(m->q->vm); } + xe_pm_runtime_put(xe); + return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index df5c36b70ab4..df0cbb2ddcb5 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -10,10 +10,11 @@ #include "tests/xe_pci_test.h" #include "tests/xe_test.h" -#include "xe_pci.h" +#include "xe_device.h" #include "xe_gt.h" #include "xe_mocs.h" -#include "xe_device.h" +#include "xe_pci.h" +#include "xe_pm.h" struct live_mocs { struct xe_mocs_info table; @@ -45,7 +46,6 @@ static void read_l3cc_table(struct xe_gt *gt, struct kunit *test = xe_cur_kunit(); - xe_device_mem_access_get(gt_to_xe(gt)); ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); mocs_dbg(>_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries); @@ -65,7 +65,6 @@ static void read_l3cc_table(struct xe_gt *gt, XELP_LNCFCMOCS(i).addr); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_device_mem_access_put(gt_to_xe(gt)); } static void read_mocs_table(struct xe_gt *gt, @@ -80,7 +79,6 @@ static void read_mocs_table(struct xe_gt *gt, struct kunit *test = xe_cur_kunit(); - xe_device_mem_access_get(gt_to_xe(gt)); ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); @@ -100,7 +98,6 @@ static void read_mocs_table(struct xe_gt *gt, XELP_GLOBAL_MOCS(i).addr); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_device_mem_access_put(gt_to_xe(gt)); } static int mocs_kernel_test_run_device(struct xe_device *xe) @@ -113,6 +110,8 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) unsigned int flags; int id; + xe_pm_runtime_get(xe); + for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); if (flags & HAS_GLOBAL_MOCS) @@ -120,6 +119,9 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); } + + xe_pm_runtime_put(xe); + return 0; } @@ -139,6 +141,8 @@ static int mocs_reset_test_run_device(struct xe_device *xe) int id; struct kunit *test = xe_cur_kunit(); + xe_pm_runtime_get(xe); + for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); kunit_info(test, "mocs_reset_test before reset\n"); @@ -156,6 +160,9 @@ static int mocs_reset_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); } + + xe_pm_runtime_put(xe); + return 0; } -- cgit v1.2.3 From 3f0e14651ab0ca340c4b6f77b2ea615605fcf8f8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:28 -0500 Subject: drm/xe: Runtime PM wake on every sysfs call Let's ensure our PCI device is awaken on every sysfs call. Let's increase the runtime_pm protection and start moving that to the outer bounds. For now, for the files with small number of attr functions, let's only call the runtime pm functions directly. For the hw_engines entries with many files, let's add the sysfs_ops wrapper. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-5-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_device_sysfs.c | 4 ++ drivers/gpu/drm/xe/xe_gt_freq.c | 38 ++++++++++++++++-- drivers/gpu/drm/xe/xe_gt_idle.c | 23 ++++++++++- drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c | 3 ++ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 58 ++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 7 ++++ drivers/gpu/drm/xe/xe_tile_sysfs.c | 1 + 7 files changed, 129 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 99113a5a2b84..e47c8ad1bb17 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -35,7 +35,9 @@ vram_d3cold_threshold_show(struct device *dev, if (!xe) return -EINVAL; + xe_pm_runtime_get(xe); ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); + xe_pm_runtime_put(xe); return ret; } @@ -58,7 +60,9 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold); + xe_pm_runtime_get(xe); ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold); + xe_pm_runtime_put(xe); return ret ?: count; } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index e5b0f4ecdbe8..32b9a743629c 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -15,6 +15,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_throttle_sysfs.h" #include "xe_guc_pc.h" +#include "xe_pm.h" /** * DOC: Xe GT Frequency Management @@ -49,12 +50,23 @@ dev_to_pc(struct device *dev) return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc; } +static struct xe_device * +dev_to_xe(struct device *dev) +{ + return gt_to_xe(kobj_to_gt(dev->kobj.parent)); +} + static ssize_t act_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_act_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc)); + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(act_freq); @@ -65,7 +77,9 @@ static ssize_t cur_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_cur_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -77,8 +91,13 @@ static ssize_t rp0_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rp0_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc)); + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(rp0_freq); @@ -86,8 +105,13 @@ static ssize_t rpe_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rpe_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc)); + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(rpe_freq); @@ -107,7 +131,9 @@ static ssize_t min_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_min_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -125,7 +151,9 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_set_min_freq(pc, freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -140,7 +168,9 @@ static ssize_t max_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_max_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -158,7 +188,9 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_set_max_freq(pc, freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 9fcae65b6469..2984680de3f9 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -12,6 +12,7 @@ #include "xe_guc_pc.h" #include "regs/xe_gt_regs.h" #include "xe_mmio.h" +#include "xe_pm.h" /** * DOC: Xe GT Idle @@ -40,6 +41,15 @@ static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle) return >idle_to_gt(gtidle)->uc.guc.pc; } +static struct xe_device * +pc_to_xe(struct xe_guc_pc *pc) +{ + struct xe_guc *guc = container_of(pc, struct xe_guc, pc); + struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + + return gt_to_xe(gt); +} + static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) { switch (state) { @@ -86,8 +96,14 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buff) { struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + ssize_t ret; + + xe_pm_runtime_get(pc_to_xe(pc)); + ret = sysfs_emit(buff, "%s\n", gtidle->name); + xe_pm_runtime_put(pc_to_xe(pc)); - return sysfs_emit(buff, "%s\n", gtidle->name); + return ret; } static DEVICE_ATTR_RO(name); @@ -98,7 +114,9 @@ static ssize_t idle_status_show(struct device *dev, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); enum xe_gt_idle_state state; + xe_pm_runtime_get(pc_to_xe(pc)); state = gtidle->idle_status(pc); + xe_pm_runtime_put(pc_to_xe(pc)); return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } @@ -111,7 +129,10 @@ static ssize_t idle_residency_ms_show(struct device *dev, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); u64 residency; + xe_pm_runtime_get(pc_to_xe(pc)); residency = gtidle->idle_residency(pc); + xe_pm_runtime_put(pc_to_xe(pc)); + return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); } static DEVICE_ATTR_RO(idle_residency_ms); diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c index 63d640591a52..9c33045ff1ef 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c @@ -11,6 +11,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_throttle_sysfs.h" #include "xe_mmio.h" +#include "xe_pm.h" /** * DOC: Xe GT Throttle @@ -38,10 +39,12 @@ static u32 read_perf_limit_reasons(struct xe_gt *gt) { u32 reg; + xe_pm_runtime_get(gt_to_xe(gt)); if (xe_gt_is_media_type(gt)) reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS); else reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS); + xe_pm_runtime_put(gt_to_xe(gt)); return reg; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 2345fb42fa39..9e23ca7f45ad 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -9,6 +9,7 @@ #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_pm.h" #define MAX_ENGINE_CLASS_NAME_LEN 16 static int xe_add_hw_engine_class_defaults(struct xe_device *xe, @@ -513,6 +514,7 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name kobject_put(&keclass->base); return NULL; } + keclass->xe = xe; err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, &keclass->base); @@ -567,9 +569,63 @@ static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) kfree(kobj); } +#include "xe_pm.h" + +static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +static inline struct xe_device *to_xe_device(const struct drm_device *dev) +{ + return container_of(dev, struct xe_device, drm); +} + +static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) { + xe_pm_runtime_get(xe); + ret = kattr->show(kobj, kattr, buf); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) { + xe_pm_runtime_get(xe); + ret = kattr->store(kobj, kattr, buf, count); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { + .show = xe_hw_engine_class_sysfs_attr_show, + .store = xe_hw_engine_class_sysfs_attr_store, +}; + static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { .release = xe_hw_engine_sysfs_kobj_release, - .sysfs_ops = &kobj_sysfs_ops, + .sysfs_ops = &xe_hw_engine_class_sysfs_ops, }; static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h index ec5ba673b314..28a0d7c909c0 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -26,6 +26,8 @@ struct kobj_eclass { struct kobject base; /** @eclass: A pointer to the hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @xe: A pointer to the xe device */ + struct xe_device *xe; }; static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj) @@ -33,4 +35,9 @@ static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kob return container_of(kobj, struct kobj_eclass, base)->eclass; } +static inline struct xe_device *kobj_to_xe(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_eclass, base)->xe; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 0662968d7bcb..237a0761d3ad 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -7,6 +7,7 @@ #include #include +#include "xe_pm.h" #include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_vram_freq.h" -- cgit v1.2.3 From 1e941c9881ec20f6d0173bcd344a605bb89cb121 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:29 -0500 Subject: drm/xe: Remove mem_access from guc_pc calls We are now protected by init, sysfs, or removal and don't need these mem_access protections around GuC_PC anymore. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-6-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 64 +++++++----------------------------------- 1 file changed, 10 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2839d685631b..f4b031b8d9de 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -381,8 +381,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) struct xe_device *xe = gt_to_xe(gt); u32 freq; - xe_device_mem_access_get(gt_to_xe(gt)); - /* When in RC6, actual frequency reported will be 0. */ if (GRAPHICS_VERx100(xe) >= 1270) { freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); @@ -394,8 +392,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) freq = decode_freq(freq); - xe_device_mem_access_put(gt_to_xe(gt)); - return freq; } @@ -412,14 +408,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) struct xe_gt *gt = pc_to_gt(pc); int ret; - xe_device_mem_access_get(gt_to_xe(gt)); /* * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out; + return ret; *freq = xe_mmio_read32(gt, RPNSWREQ); @@ -427,9 +422,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) *freq = decode_freq(*freq); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: - xe_device_mem_access_put(gt_to_xe(gt)); - return ret; + return 0; } /** @@ -451,12 +444,7 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) { - struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); - - xe_device_mem_access_get(xe); pc_update_rp_values(pc); - xe_device_mem_access_put(xe); return pc->rpe_freq; } @@ -485,7 +473,6 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) struct xe_gt *gt = pc_to_gt(pc); int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -511,7 +498,6 @@ fw: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -528,7 +514,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -544,8 +529,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; } @@ -561,7 +544,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -577,7 +559,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -594,7 +575,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -610,7 +590,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -623,8 +602,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 reg, gt_c_state; - xe_device_mem_access_get(gt_to_xe(gt)); - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg); @@ -633,8 +610,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) gt_c_state = REG_FIELD_GET(RCN_MASK, reg); } - xe_device_mem_access_put(gt_to_xe(gt)); - switch (gt_c_state) { case GT_C6: return GT_IDLE_C6; @@ -654,9 +629,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 reg; - xe_device_mem_access_get(gt_to_xe(gt)); reg = xe_mmio_read32(gt, GT_GFX_RC6); - xe_device_mem_access_put(gt_to_xe(gt)); return reg; } @@ -670,9 +643,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u64 reg; - xe_device_mem_access_get(gt_to_xe(gt)); reg = xe_mmio_read32(gt, MTL_MEDIA_MC6); - xe_device_mem_access_put(gt_to_xe(gt)); return reg; } @@ -801,23 +772,19 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (xe->info.skip_guc_pc) return 0; - xe_device_mem_access_get(pc_to_xe(pc)); - ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); if (ret) - goto out; + return ret; ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out; + return ret; xe_gt_idle_disable_c6(gt); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; + return 0; } static void pc_init_pcode_freq(struct xe_guc_pc *pc) @@ -870,11 +837,9 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) xe_gt_assert(gt, xe_device_uc_enabled(xe)); - xe_device_mem_access_get(pc_to_xe(pc)); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out_fail_force_wake; + return ret; if (xe->info.skip_guc_pc) { if (xe->info.platform != XE_PVC) @@ -914,8 +879,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out_fail_force_wake: - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -928,12 +891,9 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); int ret; - xe_device_mem_access_get(pc_to_xe(pc)); - if (xe->info.skip_guc_pc) { xe_gt_idle_disable_c6(pc_to_gt(pc)); - ret = 0; - goto out; + return 0; } mutex_lock(&pc->freq_lock); @@ -942,16 +902,14 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) ret = pc_action_shutdown(pc); if (ret) - goto out; + return ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) { drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); - ret = -EIO; + return -EIO; } -out: - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; + return 0; } /** @@ -965,9 +923,7 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg) struct xe_device *xe = pc_to_xe(pc); if (xe->info.skip_guc_pc) { - xe_device_mem_access_get(xe); xe_gt_idle_disable_c6(pc_to_gt(pc)); - xe_device_mem_access_put(xe); return; } -- cgit v1.2.3 From 5b2b3a0fbb287d862cb39469a0f4826d8073b30e Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:30 -0500 Subject: drm/xe: Runtime PM wake on every debugfs call Let's ensure our PCI device is awaken on every debugfs call. Let's increase the runtime_pm protection and start moving that to the outer bounds. Also let's remove the mem_access_{get,put} from where they are not needed anymore. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-7-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_debugfs.c | 10 ++++--- drivers/gpu/drm/xe/xe_gt_debugfs.c | 53 +++++++++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_guc_debugfs.c | 9 ++++--- drivers/gpu/drm/xe/xe_huc_debugfs.c | 5 ++-- drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 5 +++- 5 files changed, 66 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 01db5b27bec5..8abdf3c17e1d 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -12,6 +12,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_gt_debugfs.h" +#include "xe_pm.h" #include "xe_step.h" #ifdef CONFIG_DRM_XE_DEBUG @@ -37,6 +38,8 @@ static int info(struct seq_file *m, void *data) struct xe_gt *gt; u8 id; + xe_pm_runtime_get(xe); + drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", @@ -63,6 +66,7 @@ static int info(struct seq_file *m, void *data) gt->info.engine_mask); } + xe_pm_runtime_put(xe); return 0; } @@ -76,8 +80,7 @@ static int forcewake_open(struct inode *inode, struct file *file) struct xe_gt *gt; u8 id; - xe_device_mem_access_get(xe); - + xe_pm_runtime_get(xe); for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -92,8 +95,7 @@ static int forcewake_release(struct inode *inode, struct file *file) for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index c4b67cf09f8f..6b4dc2927727 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -18,6 +18,7 @@ #include "xe_lrc.h" #include "xe_macros.h" #include "xe_pat.h" +#include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" #include "xe_uc_debugfs.h" @@ -37,10 +38,10 @@ static int hw_engines(struct seq_file *m, void *data) enum xe_hw_engine_id id; int err; - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) { - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return err; } @@ -48,7 +49,7 @@ static int hw_engines(struct seq_file *m, void *data) xe_hw_engine_print(hwe, &p); err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); if (err) return err; @@ -59,18 +60,23 @@ static int force_reset(struct seq_file *m, void *data) { struct xe_gt *gt = node_to_gt(m->private); + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_reset_async(gt); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } static int sa_info(struct seq_file *m, void *data) { - struct xe_tile *tile = gt_to_tile(node_to_gt(m->private)); + struct xe_gt *gt = node_to_gt(m->private); + struct xe_tile *tile = gt_to_tile(gt); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p, tile->mem.kernel_bb_pool->gpu_addr); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -80,7 +86,9 @@ static int topology(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_topology_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -90,7 +98,9 @@ static int steering(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_mcr_steering_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -99,8 +109,13 @@ static int ggtt(struct seq_file *m, void *data) { struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + int ret; + + xe_pm_runtime_get(gt_to_xe(gt)); + ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); - return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); + return ret; } static int register_save_restore(struct seq_file *m, void *data) @@ -110,6 +125,8 @@ static int register_save_restore(struct seq_file *m, void *data) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; + xe_pm_runtime_get(gt_to_xe(gt)); + xe_reg_sr_dump(>->reg_sr, &p); drm_printf(&p, "\n"); @@ -127,6 +144,8 @@ static int register_save_restore(struct seq_file *m, void *data) for_each_hw_engine(hwe, gt, id) xe_reg_whitelist_dump(&hwe->reg_whitelist, &p); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } @@ -135,7 +154,9 @@ static int workarounds(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_wa_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -145,48 +166,70 @@ static int pat(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_pat_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } static int rcs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int ccs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int bcs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int vcs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int vecs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index ffd7d53bcc42..d3822cbea273 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -14,6 +14,7 @@ #include "xe_guc_ct.h" #include "xe_guc_log.h" #include "xe_macros.h" +#include "xe_pm.h" static struct xe_guc *node_to_guc(struct drm_info_node *node) { @@ -26,9 +27,9 @@ static int guc_info(struct seq_file *m, void *data) struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_guc_print_info(guc, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } @@ -39,9 +40,9 @@ static int guc_log(struct seq_file *m, void *data) struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_guc_log_print(&guc->log, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c index 18585a7eeb9d..3a888a40188b 100644 --- a/drivers/gpu/drm/xe/xe_huc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -12,6 +12,7 @@ #include "xe_gt.h" #include "xe_huc.h" #include "xe_macros.h" +#include "xe_pm.h" static struct xe_gt * huc_to_gt(struct xe_huc *huc) @@ -36,9 +37,9 @@ static int huc_info(struct seq_file *m, void *data) struct xe_device *xe = huc_to_xe(huc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_huc_print_info(huc, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index 3e1fa0c832ca..9844a8edbfe1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -73,7 +73,10 @@ static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man, static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { - + /* + * This function is called by debugfs entry and would require + * pm_runtime_{get,put} wrappers around any operation. + */ } static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = { -- cgit v1.2.3 From db5a5a8338347ef43bb53ccab730cde326d030e8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:31 -0500 Subject: drm/xe: Replace dma_buf mem_access per direct xe_pm_runtime calls Continue on the path to entirely remove mem_access helpers in favour of the direct xe_pm_runtime calls. This item is one of the direct outer bounds of the protection. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-8-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_dma_buf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index da2627ed6ae7..5b26af21e029 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -16,6 +16,7 @@ #include "tests/xe_test.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_pm.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" @@ -33,7 +34,7 @@ static int xe_dma_buf_attach(struct dma_buf *dmabuf, if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT)) return -EOPNOTSUPP; - xe_device_mem_access_get(to_xe_device(obj->dev)); + xe_pm_runtime_get(to_xe_device(obj->dev)); return 0; } @@ -42,7 +43,7 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf, { struct drm_gem_object *obj = attach->dmabuf->priv; - xe_device_mem_access_put(to_xe_device(obj->dev)); + xe_pm_runtime_put(to_xe_device(obj->dev)); } static int xe_dma_buf_pin(struct dma_buf_attachment *attach) -- cgit v1.2.3 From 5a2a90847450f193f681886db56c3ace5a945785 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:32 -0500 Subject: drm/xe: Convert hwmon from mem_access to xe_pm_runtime calls Continue the work to kill the mem_access in favor of a pure runtime pm. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-9-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_hwmon.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index b82233a41606..a256af8c2012 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -18,6 +18,7 @@ #include "xe_pcode.h" #include "xe_pcode_api.h" #include "xe_sriov.h" +#include "xe_pm.h" enum xe_hwmon_reg { REG_PKG_RAPL_LIMIT, @@ -266,7 +267,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); mutex_lock(&hwmon->hwmon_lock); @@ -275,7 +276,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a mutex_unlock(&hwmon->hwmon_lock); - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); @@ -354,7 +355,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); mutex_lock(&hwmon->hwmon_lock); @@ -363,7 +364,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * mutex_unlock(&hwmon->hwmon_lock); - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return count; } @@ -384,12 +385,12 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr) ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0; - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -610,7 +611,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: @@ -630,7 +631,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -642,7 +643,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: @@ -662,7 +663,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -674,7 +675,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: @@ -688,7 +689,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } -- cgit v1.2.3 From f9d9f94cfb25c9be40a6f51fbfdd1131dfc55ded Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:33 -0500 Subject: drm/xe: Remove useless mem_access protection for query ioctls Every IOCTL is already protected on its outer bounds by xe_pm_runtime_{get,put} calls, so we can now remove these. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-10-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_query.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 92bb06c0586e..f1876b556ab4 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -147,7 +147,6 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - xe_device_mem_access_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); __read_timestamps(gt, @@ -159,7 +158,6 @@ query_engine_cycles(struct xe_device *xe, cpu_clock); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(xe); resp.width = 36; /* Only write to the output fields of user query */ @@ -433,9 +431,7 @@ static int query_hwconfig(struct xe_device *xe, if (!hwconfig) return -ENOMEM; - xe_device_mem_access_get(xe); xe_guc_hwconfig_copy(>->uc.guc, hwconfig); - xe_device_mem_access_put(xe); if (copy_to_user(query_ptr, hwconfig, size)) { kfree(hwconfig); -- cgit v1.2.3 From 48fef288070b31a6a04ed13c929f1d15ebf52fe5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:34 -0500 Subject: drm/xe: Convert gsc_work from mem_access to xe_pm_runtime Let's directly use xe_pm_runtime_{get,put} instead of the mem_access helpers that are going away soon. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-11-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_gsc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index a61994292c43..d9aa815a5bc2 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -21,6 +21,7 @@ #include "xe_huc.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sched_job.h" #include "xe_uc_fw.h" #include "xe_wa.h" @@ -285,7 +286,7 @@ static void gsc_work(struct work_struct *work) gsc->work_actions = 0; spin_unlock_irq(&gsc->lock); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); if (actions & GSC_ACTION_FW_LOAD) { @@ -300,7 +301,7 @@ static void gsc_work(struct work_struct *work) xe_gsc_proxy_request_handler(gsc); xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); } int xe_gsc_init(struct xe_gsc *gsc) -- cgit v1.2.3 From 5d118681d341fbf470d06bf1938d996d3bed6a1d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:35 -0500 Subject: drm/xe: Remove mem_access from suspend and resume functions At these points, we are sure that device is awake in D0. Likely in the middle of the transition, but awake. So, these extra protections are useless. Let's remove it and continue with the killing of xe_device_mem_access. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-12-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index b75f0bf0a9a1..e0c79351a131 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -711,13 +711,11 @@ void xe_gt_reset_async(struct xe_gt *gt) void xe_gt_suspend_prepare(struct xe_gt *gt) { - xe_device_mem_access_get(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_uc_stop_prepare(>->uc); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); } int xe_gt_suspend(struct xe_gt *gt) @@ -726,7 +724,6 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_sanitize(gt); - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -736,7 +733,6 @@ int xe_gt_suspend(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_info(gt, "suspended\n"); return 0; @@ -744,7 +740,6 @@ int xe_gt_suspend(struct xe_gt *gt) err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); return err; @@ -754,7 +749,6 @@ int xe_gt_resume(struct xe_gt *gt) { int err; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -764,7 +758,6 @@ int xe_gt_resume(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_info(gt, "resumed\n"); return 0; @@ -772,7 +765,6 @@ int xe_gt_resume(struct xe_gt *gt) err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); return err; -- cgit v1.2.3 From 23be0e6e8316aaab6acfe15f035960914f820a2a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:36 -0500 Subject: drm/xe: Convert gt_reset from mem_access to xe_pm_runtime We need to ensure that device is in D0 on any kind of GT reset. We are likely already protected by outer bounds like exec, but if exec/sched ref gets dropped on a hang, we might transition to D3 before we are able to perform the gt_reset and recover. Suggested-by: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-13-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index e0c79351a131..45646d3aea2d 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -43,6 +43,7 @@ #include "xe_migrate.h" #include "xe_mmio.h" #include "xe_pat.h" +#include "xe_pm.h" #include "xe_mocs.h" #include "xe_reg_sr.h" #include "xe_ring_ops.h" @@ -644,9 +645,9 @@ static int gt_reset(struct xe_gt *gt) goto err_fail; } + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_sanitize(gt); - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -670,8 +671,8 @@ static int gt_reset(struct xe_gt *gt) goto err_out; err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(err); + xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); @@ -681,7 +682,7 @@ err_out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: XE_WARN_ON(xe_uc_start(>->uc)); - xe_device_mem_access_put(gt_to_xe(gt)); + xe_pm_runtime_put(gt_to_xe(gt)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); -- cgit v1.2.3 From 1732391763634b9e877670ecf2e2268d7323ee15 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:37 -0500 Subject: drm/xe: Remove useless mem_access on PAT dumps PAT dumps are already protected by the xe_pm_runtime_{get,put} around the debugfs call. So, these can be removed. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-14-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_pat.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index e148934d554b..66d8e3dd8237 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -174,7 +174,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -192,7 +191,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xelp_pat_ops = { @@ -205,7 +203,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -225,7 +222,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xehp_pat_ops = { @@ -238,7 +234,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -256,7 +251,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xehpc_pat_ops = { @@ -269,7 +263,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -292,7 +285,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } /* @@ -325,7 +317,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) int i, err; u32 pat; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -370,7 +361,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xe2_pat_ops = { -- cgit v1.2.3 From b2121f2bd2232cd0556b2182078d159d81497885 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 8 Feb 2024 10:35:39 -0800 Subject: drm/xe: Extend uAPI to query HuC micro-controler firmware version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The infrastructure to query GuC firmware version is already in place. It is extended with a new micro-controller type to query the HuC firmware version. It can be used from user space to know if HuC is running. Cc: John Harrison Cc: Francois Dugast Cc: Lucas De Marchi Signed-off-by: Francois Dugast Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240208183539.185095-2-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_query.c | 38 ++++++++++++++++++++++++++++++++++---- include/uapi/drm/xe_drm.h | 1 + 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index f1876b556ab4..a6a20a6dd360 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -540,14 +540,44 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; break; } + case XE_QUERY_UC_TYPE_HUC: { + struct xe_gt *media_gt = NULL; + struct xe_huc *huc; + + if (MEDIA_VER(xe) >= 13) { + struct xe_tile *tile; + u8 gt_id; + + for_each_tile(tile, xe, gt_id) { + if (tile->media_gt) { + media_gt = tile->media_gt; + break; + } + } + } else { + media_gt = xe->tiles[0].primary_gt; + } + + if (!media_gt) + break; + + huc = &media_gt->uc.huc; + if (huc->fw.status == XE_UC_FIRMWARE_RUNNING) + version = &huc->fw.versions.found[XE_UC_FW_VER_RELEASE]; + break; + } default: return -EINVAL; } - resp.branch_ver = 0; - resp.major_ver = version->major; - resp.minor_ver = version->minor; - resp.patch_ver = version->patch; + if (version) { + resp.branch_ver = 0; + resp.major_ver = version->major; + resp.minor_ver = version->minor; + resp.patch_ver = version->patch; + } else { + return -ENODEV; + } if (copy_to_user(query_ptr, &resp, size)) return -EFAULT; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 538a3ac95c54..2fc19177d2b0 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -583,6 +583,7 @@ struct drm_xe_query_engine_cycles { struct drm_xe_query_uc_fw_version { /** @uc_type: The micro-controller type to query firmware version */ #define XE_QUERY_UC_TYPE_GUC_SUBMISSION 0 +#define XE_QUERY_UC_TYPE_HUC 1 __u16 uc_type; /** @pad: MBZ */ -- cgit v1.2.3 From 35ed1d2bfff7b1969e7f99f3641a83ea54f037e2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Feb 2024 07:55:54 -0800 Subject: drm/xe: Use vmalloc for array of bind allocation in bind IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use vmalloc in effort to allow a user pass in a large number of binds in an IOCTL (mesa use case). Also use array allocations rather open coding the size calculation. v2: Use __GFP_ACCOUNT for allocations (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240226155554.103384-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index db3f049a47dc..4154ef75e369 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2770,8 +2770,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); - *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * - args->num_binds, GFP_KERNEL); + *bind_ops = kvmalloc_array(args->num_binds, + sizeof(struct drm_xe_vm_bind_op), + GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) return -ENOMEM; @@ -2861,7 +2862,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) - kfree(*bind_ops); + kvfree(*bind_ops); return err; } @@ -2949,13 +2950,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (args->num_binds) { - bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + bos = kvcalloc(args->num_binds, sizeof(*bos), + GFP_KERNEL | __GFP_ACCOUNT); if (!bos) { err = -ENOMEM; goto release_vm_lock; } - ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + ops = kvcalloc(args->num_binds, sizeof(*ops), + GFP_KERNEL | __GFP_ACCOUNT); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3096,10 +3099,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; @@ -3123,10 +3126,10 @@ put_exec_queue: if (q) xe_exec_queue_put(q); free_objs: - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; } -- cgit v1.2.3 From ba6bbdc6eaef92998ec7f323c9e1211d344d2556 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 14 Feb 2024 16:53:53 -0800 Subject: drm/xe: get rid of MAX_BINDS Mesa has been issuing a single bind operation per ioctl since xe.ko changed to GPUVA due xe.ko bug #746. If I change Mesa to try again to issue every single bind operation it can in the same ioctl, it hits the MAX_BINDS assertion when running Vulkan conformance tests. Test dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 issues 960 bind operations in a single ioctl, it's the most I could find in the conformance suite. I don't see a reason to keep the MAX_BINDS restriction: it doesn't seem to be preventing any specific issue. If the number is too big for the memory allocations, then those will fail. Nothing related to num_binds seems to be using the stack. Let's just get rid of it. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Testcase: dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/746 Cc: Matthew Brost Signed-off-by: Paulo Zanoni Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240215005353.1295420-1-paulo.r.zanoni@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4154ef75e369..4e9ceb1817f9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2749,8 +2749,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ - static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) @@ -2762,8 +2760,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; if (args->num_binds > 1) { -- cgit v1.2.3 From e275d61c5f3ffc250b2a9601d36fbd11b4db774b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 23 Feb 2024 12:46:59 -0800 Subject: drm/xe/guc: Handle timing out of signaled jobs gracefully MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timing out of signaled jobs can happen during regular operations (e.g. an exec queue closed immediately after last fence signaled). The TDR can pass the worker which free jobs. Rather than running through the TDR if signaled job is found, simply free it without any debug messages. Cc: Thomas Hellström Reported-by: José Roberto de Souza Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1271 Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Tested-by: José Roberto de Souza Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240223204659.40750-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ff77bc8da1b2..29748e40555f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -929,20 +929,26 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; int i = 0; - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), q->guc->id, q->flags); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); - - simple_error_capture(q); - xe_devcoredump(job); - } else { - drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), q->guc->id, q->flags); + /* + * TDR has fired before free job worker. Common if exec queue + * immediately closed after last fence signaled. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { + guc_exec_queue_free_job(drm_job); + + return DRM_GPU_SCHED_STAT_NOMINAL; } + + drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), q->guc->id, q->flags); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, + "Kernel-submitted job timed out\n"); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), + "VM job timed out on non-killed execqueue\n"); + + simple_error_capture(q); + xe_devcoredump(job); + trace_xe_sched_job_timedout(job); /* Kill the run_job entry point */ -- cgit v1.2.3 From 977e5b82e0901480bc201342d39f855fc0a2ef47 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:51 +0200 Subject: drm/xe: Expose user fence from xe_sync_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By allowing getting reference to user fence, we can control the lifetime outside of sync entries. This is needed to allow vma to track the associated user fence that was provided with bind ioctl. v2: xe_user_fence can be kept opaque (Jani, Matt) v3: indent fix (Matt) Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_sync.c | 58 +++++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_sync.h | 4 +++ drivers/gpu/drm/xe/xe_sync_types.h | 2 +- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index aab92bee1d7c..02c9577fe418 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -struct user_fence { +struct xe_user_fence { struct xe_device *xe; struct kref refcount; struct dma_fence_cb cb; @@ -27,31 +27,32 @@ struct user_fence { struct mm_struct *mm; u64 __user *addr; u64 value; + int signalled; }; static void user_fence_destroy(struct kref *kref) { - struct user_fence *ufence = container_of(kref, struct user_fence, + struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, refcount); mmdrop(ufence->mm); kfree(ufence); } -static void user_fence_get(struct user_fence *ufence) +static void user_fence_get(struct xe_user_fence *ufence) { kref_get(&ufence->refcount); } -static void user_fence_put(struct user_fence *ufence) +static void user_fence_put(struct xe_user_fence *ufence) { kref_put(&ufence->refcount, user_fence_destroy); } -static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, - u64 value) +static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) { - struct user_fence *ufence; + struct xe_user_fence *ufence; ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) @@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static void user_fence_worker(struct work_struct *w) { - struct user_fence *ufence = container_of(w, struct user_fence, worker); + struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); @@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) } wake_up_all(&ufence->xe->ufence_wq); + WRITE_ONCE(ufence->signalled, 1); user_fence_put(ufence); } -static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) { INIT_WORK(&ufence->worker, user_fence_worker); queue_work(ufence->xe->ordered_wq, &ufence->worker); @@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct user_fence *ufence = container_of(cb, struct user_fence, cb); + struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); kick_ufence(ufence, fence); } @@ -340,3 +342,39 @@ err_out: return ERR_PTR(-ENOMEM); } + +/** + * xe_sync_ufence_get() - Get user fence from sync + * @sync: input sync + * + * Get a user fence reference from sync. + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) +{ + user_fence_get(sync->ufence); + + return sync->ufence; +} + +/** + * xe_sync_ufence_put() - Put user fence reference + * @ufence: user fence reference + * + */ +void xe_sync_ufence_put(struct xe_user_fence *ufence) +{ + user_fence_put(ufence); +} + +/** + * xe_sync_ufence_get_status() - Get user fence status + * @ufence: user fence + * + * Return: 1 if signalled, 0 not signalled, <0 on error + */ +int xe_sync_ufence_get_status(struct xe_user_fence *ufence) +{ + return READ_ONCE(ufence->signalled); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index f43cdcaca6c5..0fd0d51208e6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); +void xe_sync_ufence_put(struct xe_user_fence *ufence); +int xe_sync_ufence_get_status(struct xe_user_fence *ufence); + #endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 852db5e7884f..30ac3f51993b 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,7 +18,7 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; - struct user_fence *ufence; + struct xe_user_fence *ufence; u64 addr; u64 timeline_value; u32 type; -- cgit v1.2.3 From 158900ade92cce5ab85a06d618eb51e6c7ffb28a Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:52 +0200 Subject: drm/xe: Deny unbinds if uapi ufence pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If user fence was provided for MAP in vm_bind_ioctl and it has still not been signalled, deny UNMAP of said vma with EBUSY as long as unsignalled fence exists. This guarantees that MAP vs UNMAP sequences won't escape under the radar if we ever want to track the client's state wrt to completed and accessible MAPs. By means of intercepting the ufence release signalling. v2: find ufence with num_fences > 1 (Matt) v3: careful on clearing vma ufence (Matt) Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1159 Cc: Thomas Hellström Cc: Matthew Brost Cc: Joonas Lahtinen Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-3-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_vm.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 7 +++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4e9ceb1817f9..d28260351af2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -903,6 +903,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) struct xe_device *xe = vm->xe; bool read_only = xe_vma_read_only(vma); + if (vma->ufence) { + xe_sync_ufence_put(vma->ufence); + vma->ufence = NULL; + } + if (xe_vma_is_userptr(vma)) { struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; @@ -1622,6 +1627,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return ERR_PTR(-EBUSY); + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1755,6 +1770,21 @@ err_fences: return ERR_PTR(err); } +static struct xe_user_fence * +find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) +{ + unsigned int i; + + for (i = 0; i < num_syncs; i++) { + struct xe_sync_entry *e = &syncs[i]; + + if (xe_sync_is_ufence(e)) + return xe_sync_ufence_get(e); + } + + return NULL; +} + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1762,9 +1792,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_user_fence *ufence; xe_vm_assert_held(vm); + ufence = find_ufence_get(syncs, num_syncs); + if (vma->ufence && ufence) + xe_sync_ufence_put(vma->ufence); + + vma->ufence = ufence ?: vma->ufence; + if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 3fce50b91256..4c4a569bde3d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -19,6 +19,7 @@ struct xe_bo; struct xe_sync_entry; +struct xe_user_fence; struct xe_vm; #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS @@ -105,6 +106,12 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; + + /** + * @ufence: The user fence that was provided with MAP. + * Needs to be signalled before UNMAP can be processed. + */ + struct xe_user_fence *ufence; }; /** -- cgit v1.2.3 From 0e6fec6da25167a568fbaeb8401d8172069124ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:36 +0100 Subject: drm/xe/kunit: fix link failure with built-in xe When the driver is built-in but the tests are in loadable modules, the helpers don't actually get put into the driver: ERROR: modpost: "xe_kunit_helper_alloc_xe_device" [drivers/gpu/drm/xe/tests/xe_test.ko] undefined! Change the Makefile to ensure they are always part of the driver even when the rest of the kunit tests are in loadable modules. Fixes: 5095d13d758b ("drm/xe/kunit: Define helper functions to allocate fake xe device") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-1-arnd@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 1 + drivers/gpu/drm/xe/Kconfig.debug | 1 - drivers/gpu/drm/xe/Makefile | 6 ++++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 0e31dfb8989e..1a556d087e63 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -10,6 +10,7 @@ config DRM_XE select DRM_BUDDY select DRM_EXEC select DRM_KMS_HELPER + select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER select DRM_DISPLAY_DP_HELPER diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 549065f57a78..df02e5d17d26 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -76,7 +76,6 @@ config DRM_XE_KUNIT_TEST depends on DRM_XE && KUNIT && DEBUG_FS default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m - select DRM_KUNIT_TEST_HELPERS help Choose this option to allow the driver to perform selftests under the kunit framework diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index c531210695db..1a59c15f4d66 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -158,8 +158,10 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o -xe-$(CONFIG_DRM_XE_KUNIT_TEST) += \ - tests/xe_kunit_helpers.o +# include helpers for tests even when XE is built-in +ifdef CONFIG_DRM_XE_KUNIT_TEST +xe-y += tests/xe_kunit_helpers.o +endif # i915 Display compat #defines and #includes subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ -- cgit v1.2.3 From f5d3983366c0b88ec388b3407b29c1c0862ee2b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:37 +0100 Subject: drm/xe/mmio: fix build warning for BAR resize on 32-bit clang complains about a nonsensical test on builds with a 32-bit phys_addr_t, which means resizing will always fail: drivers/gpu/drm/xe/xe_mmio.c:109:23: error: result of comparison of constant 4294967296 with expression of type 'resource_size_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 109 | root_res->start > 0x100000000ull) | ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ Previously, BAR resize was always disallowed on 32-bit kernels, but this apparently changed recently. Since 32-bit machines can in theory support PAE/LPAE for large address spaces, this may end up useful, so change the driver to shut up the warning but still work when phys_addr_t/resource_size_t is 64 bit wide. Fixes: 9a6e6c14bfde ("drm/xe/mmio: Use non-atomic writeq/readq variant for 32b") Fixes: 237412e45390 ("drm/xe: Enable 32bits build") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-2-arnd@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index e3db3a178760..7ba2477452d7 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -106,7 +106,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_bus_for_each_resource(root, root_res, i) { if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - root_res->start > 0x100000000ull) + (u64)root_res->start > 0x100000000ul) break; } -- cgit v1.2.3 From 1408784b599927d2f361bac6dc5170d2ee275f17 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:38 +0100 Subject: drm/xe/xe2: fix 64-bit division in pte_update_size This function does not build on 32-bit targets when the compiler fails to reduce DIV_ROUND_UP() into a shift: ld.lld: error: undefined symbol: __aeabi_uldivmod >>> referenced by xe_migrate.c >>> drivers/gpu/drm/xe/xe_migrate.o:(pte_update_size) in archive vmlinux.a There are two instances in this function. Change the first to use an open-coded shift with the same behavior, and the second one to a 32-bit calculation, which is sufficient here as the size is never more than 2^32 pages (16TB). Fixes: 237412e45390 ("drm/xe: Enable 32bits build") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-3-arnd@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a66fdf2d2991..ee1bb938c493 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -462,7 +462,7 @@ static u32 pte_update_size(struct xe_migrate *m, } else { /* Clip L0 to available size */ u64 size = min(*L0, (u64)avail_pts * SZ_2M); - u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); + u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT; *L0 = size; *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); -- cgit v1.2.3 From 4c47049d93b7a7fc2230cded84a6aec6bbd3d61e Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Tue, 27 Feb 2024 08:49:22 -0800 Subject: drm/xe/guc: Fix missing topology init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit init_steering_dss need topology dss mask to be init ahead. Fixed by moving xe_gt_topology_init ahead of xe_gt_mcr_init Fixes: bf8ec3c3e82c ("drm/xe: Initialize GuC earlier during probe") Cc: Michał Winiarski Signed-off-by: Zhanjun Dong Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240227164922.281346-2-zhanjun.dong@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 45646d3aea2d..85408e7a932b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -315,8 +315,6 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_gt_topology_init(gt); - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); if (err) return err; @@ -503,6 +501,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) if (err) goto out; + xe_gt_topology_init(gt); xe_gt_mcr_init(gt); xe_pat_init(gt); -- cgit v1.2.3 From 8034f6b070cc3716e81b1846f8a4ca5339c3f29b Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Wed, 21 Feb 2024 15:49:50 +0530 Subject: drm/xe/xe_trace: Add move_lacks_source detail to xe_bo_move trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add move_lacks_source detail to xe_bo_move trace to make it readable that is to check if it is migrate clear or migrate copy. Cc: Thomas Hellström Signed-off-by: Priyanka Dandamudi Reviewed-by: Thomas Hellström Fixes: a0df2cc858c3 ("drm/xe/xe_bo_move: Enhance xe_bo_move trace") Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240221101950.1019312-1-priyanka.dandamudi@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_trace.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 76dfaf1cd200..6603a0ea79c5 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -736,7 +736,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 3b97633d81d8..4ddc55527f9a 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -102,14 +102,16 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, ); TRACE_EVENT(xe_bo_move, - TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement), - TP_ARGS(bo, new_placement, old_placement), + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) __field(u32, new_placement) __field(u32, old_placement) __array(char, device_id, 12) + __field(bool, move_lacks_source) ), TP_fast_assign( @@ -118,9 +120,11 @@ TRACE_EVENT(xe_bo_move, __entry->new_placement = new_placement; __entry->old_placement = old_placement; strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __entry->move_lacks_source = move_lacks_source; ), - TP_printk("migrate object %p [size %zu] from %s to %s device_id:%s", - __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); -- cgit v1.2.3 From e5f276dc1e4c6475d322bc4672c33ab74b068f3b Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 17 Jan 2024 13:09:08 +0200 Subject: drm/xe: Remove obsolete async_ops from struct xe_vm When sync binds were reworked and worker removed, async_ops became obsolete. Remove it. Fixes: f3e9b1f43458 ("drm/xe: Remove async worker and rework sync binds") Signed-off-by: Mika Kuoppala Reviewed-by: Francois Dugast Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240117110908.2362615-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_vm_types.h | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 4c4a569bde3d..79b5cab57711 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -196,30 +196,6 @@ struct xe_vm { */ struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE]; - /** @async_ops: async VM operations (bind / unbinds) */ - struct { - /** @list: list of pending async VM ops */ - struct list_head pending; - /** @work: worker to execute async VM ops */ - struct work_struct work; - /** @lock: protects list of pending async VM ops and fences */ - spinlock_t lock; - /** @fence: fence state */ - struct { - /** @context: context of async fence */ - u64 context; - /** @seqno: seqno of async fence */ - u32 seqno; - } fence; - /** @error: error state for async VM ops */ - int error; - /** - * @munmap_rebind_inflight: an munmap style VM bind is in the - * middle of a set of ops which requires a rebind at the end. - */ - bool munmap_rebind_inflight; - } async_ops; - const struct xe_pt_ops *pt_ops; /** @userptr: user pointer state */ -- cgit v1.2.3 From b9b7db490892f1b8be0e1fe92d0022a14d504efb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 22 Feb 2024 10:40:08 -0800 Subject: drm/xe: Add LRC parsing for more GPU instructions The LRCs on some of our newer platforms appear to contain a few GPU instructions that weren't handled in our LRC parser. Add the relevant instruction names and opcodes so that our debugfs LRC dumps will properly indicate what these are. Bspec: 55866, 64848, 46931 Signed-off-by: Matt Roper Reviewed-by: Ravi Kumar Vodapalli Link: https://patchwork.freedesktop.org/patch/msgid/20240222184009.6857-2-matthew.d.roper@intel.com --- .../drm/xe/instructions/xe_gfx_state_commands.h | 18 +++++++++++++ .../gpu/drm/xe/instructions/xe_gfxpipe_commands.h | 3 +++ drivers/gpu/drm/xe/instructions/xe_instr_defs.h | 1 + drivers/gpu/drm/xe/xe_lrc.c | 31 ++++++++++++++++++++++ 4 files changed, 53 insertions(+) create mode 100644 drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h diff --git a/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h new file mode 100644 index 000000000000..dca62af5a5d5 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GFX_STATE_COMMANDS_H_ +#define _XE_GFX_STATE_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +#define GFX_STATE_OPCODE REG_GENMASK(28, 26) + +#define GFX_STATE_CMD(opcode) \ + (XE_INSTR_GFX_STATE | REG_FIELD_PREP(GFX_STATE_OPCODE, opcode)) + +#define STATE_WRITE_INLINE GFX_STATE_CMD(0x0) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index 8e6dd061f2ae..31d28a67ef6a 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -47,6 +47,8 @@ #define GPGPU_CSR_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x4) #define STATE_COMPUTE_MODE GFXPIPE_COMMON_CMD(0x1, 0x5) #define CMD_3DSTATE_BTD GFXPIPE_COMMON_CMD(0x1, 0x6) +#define STATE_SYSTEM_MEM_FENCE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x9) +#define STATE_CONTEXT_DATA_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0xB) #define CMD_3DSTATE_VF_STATISTICS GFXPIPE_SINGLE_DW_CMD(0x0, 0xB) @@ -71,6 +73,7 @@ #define CMD_3DSTATE_WM GFXPIPE_3D_CMD(0x0, 0x14) #define CMD_3DSTATE_CONSTANT_VS GFXPIPE_3D_CMD(0x0, 0x15) #define CMD_3DSTATE_CONSTANT_GS GFXPIPE_3D_CMD(0x0, 0x16) +#define CMD_3DSTATE_CONSTANT_PS GFXPIPE_3D_CMD(0x0, 0x17) #define CMD_3DSTATE_SAMPLE_MASK GFXPIPE_3D_CMD(0x0, 0x18) #define CMD_3DSTATE_CONSTANT_HS GFXPIPE_3D_CMD(0x0, 0x19) #define CMD_3DSTATE_CONSTANT_DS GFXPIPE_3D_CMD(0x0, 0x1A) diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h index 04179b2a48e1..fd2ce7ace510 100644 --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h @@ -17,6 +17,7 @@ #define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0) #define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2) #define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3) +#define XE_INSTR_GFX_STATE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4) /* * Most (but not all) instructions have a "length" field in the instruction diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 7ad853b0788a..6c675d1bcb57 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -7,6 +7,7 @@ #include "instructions/xe_mi_commands.h" #include "instructions/xe_gfxpipe_commands.h" +#include "instructions/xe_gfx_state_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_lrc_layout.h" @@ -1037,6 +1038,8 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH(GPGPU_CSR_BASE_ADDRESS); MATCH(STATE_COMPUTE_MODE); MATCH3D(3DSTATE_BTD); + MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); + MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); MATCH3D(3DSTATE_VF_STATISTICS); @@ -1061,6 +1064,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_WM); MATCH3D(3DSTATE_CONSTANT_VS); MATCH3D(3DSTATE_CONSTANT_GS); + MATCH3D(3DSTATE_CONSTANT_PS); MATCH3D(3DSTATE_SAMPLE_MASK); MATCH3D(3DSTATE_CONSTANT_HS); MATCH3D(3DSTATE_CONSTANT_DS); @@ -1153,6 +1157,31 @@ static int dump_gfxpipe_command(struct drm_printer *p, } } +static int dump_gfx_state_command(struct drm_printer *p, + struct xe_gt *gt, + u32 *dw, + int remaining_dw) +{ + u32 numdw = instr_dw(*dw); + u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); + + /* + * Make sure we haven't mis-parsed a number of dwords that exceeds the + * remaining size of the LRC. + */ + if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) + numdw = remaining_dw; + + switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { + MATCH(STATE_WRITE_INLINE); + + default: + drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", + *dw, opcode, numdw); + return numdw; + } +} + void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class hwe_class) @@ -1177,6 +1206,8 @@ void xe_lrc_dump_default(struct drm_printer *p, num_dw = dump_mi_command(p, gt, dw, remaining_dw); } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); + } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { + num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); } else { num_dw = min(instr_dw(*dw), remaining_dw); drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", -- cgit v1.2.3 From d0a5fb2e0a5abeaad983c5c5c52b88ccef3aaae2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sat, 24 Feb 2024 16:14:48 -0800 Subject: drm/xe: Fix build error in xe_ggtt.c Need to include io-64-nonatomic-lo-hi.h for writeq function. Commit 3121fed0c51b ("drm/xe: Cleanup some layering in GGTT") removed the xe_mmio.h include so lost the indirect include. Add it where it's needed. Fixes: 3121fed0c51b ("drm/xe: Cleanup some layering in GGTT") Closes: https://lore.kernel.org/oe-kbuild-all/202402241903.R5J8hKVI-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240225001448.81513-1-matthew.brost@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ggtt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 5d46958e3144..717d0e76277a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -5,6 +5,7 @@ #include "xe_ggtt.h" +#include #include #include -- cgit v1.2.3 From 25664e328f930811fd2e91f32d540a453bcf1334 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 27 Feb 2024 22:10:44 -0800 Subject: drm/xe/mocs: Refactor mocs/l3cc loop There's no reason to keep the assignment an condition in the same statement, particularly making use of the comma operator. Improve readability by doing each step on its own statement. This will make supporting odd number of entries more easily. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240228061048.3661978-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_mocs.c | 17 +++++++++-------- drivers/gpu/drm/xe/xe_mocs.c | 19 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index df0cbb2ddcb5..7c91e01c47a3 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -49,11 +49,11 @@ static void read_l3cc_table(struct xe_gt *gt, ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); mocs_dbg(>_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries); - for (i = 0; - i < (info->n_entries + 1) / 2 ? - (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), - get_entry_l3cc(info, 2 * i + 1))), 1 : 0; - i++) { + + for (i = 0; i < (info->n_entries + 1) / 2; i++) { + l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1)); + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); else @@ -84,9 +84,10 @@ static void read_mocs_table(struct xe_gt *gt, mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, "Unused entries index should have been defined\n"); - for (i = 0; - i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; - i++) { + + for (i = 0; i < info->n_entries; i++) { + mocs = get_entry_control(info, i); + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 609d997b3e9b..001e4301c639 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -473,9 +473,9 @@ static void __init_mocs_table(struct xe_gt *gt, mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, "Unused entries index should have been defined\n"); - for (i = 0; - i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; - i++) { + for (i = 0; i < info->n_entries; i++) { + mocs = get_entry_control(info, i); + mocs_dbg(>_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, XELP_GLOBAL_MOCS(i).addr, mocs); @@ -511,13 +511,12 @@ static void init_l3cc_table(struct xe_gt *gt, u32 l3cc; mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); - for (i = 0; - i < (info->n_entries + 1) / 2 ? - (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), - get_entry_l3cc(info, 2 * i + 1))), 1 : 0; - i++) { - mocs_dbg(>_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, - l3cc); + for (i = 0; i < (info->n_entries + 1) / 2; i++) { + l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1)); + + mocs_dbg(>_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, + XELP_LNCFCMOCS(i).addr, l3cc); if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc); -- cgit v1.2.3 From d40c6c45fbe7aeb7e773b786b4ec010d5d40367d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 27 Feb 2024 22:10:45 -0800 Subject: drm/xe/mocs: Be explicit when logging number of entries Make sure to log if number of entries are l3cc or mocs so it doesn't depend on the context. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240228061048.3661978-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mocs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 001e4301c639..bdb755b96323 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -470,7 +470,7 @@ static void __init_mocs_table(struct xe_gt *gt, unsigned int i; u32 mocs; - mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); + mocs_dbg(>_to_xe(gt)->drm, "mocs entries: %d\n", info->n_entries); drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, "Unused entries index should have been defined\n"); for (i = 0; i < info->n_entries; i++) { @@ -510,7 +510,8 @@ static void init_l3cc_table(struct xe_gt *gt, unsigned int i; u32 l3cc; - mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); + mocs_dbg(>_to_xe(gt)->drm, "l3cc entries: %d\n", info->n_entries); + for (i = 0; i < (info->n_entries + 1) / 2; i++) { l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), get_entry_l3cc(info, 2 * i + 1)); -- cgit v1.2.3 From 17c20e3b50c7fd03b7583d52f1a8b022dbd0750f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 27 Feb 2024 22:10:46 -0800 Subject: drm/xe/mocs: Move warn/assertion up The warn-once in __init_mocs_table() to make sure there's an index set for unused entries is more a sanity check that should be done as the first thing in that function. The kunit test replicates the same check, so also move it up and turn it into a failure condition for the test. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240228061048.3661978-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_mocs.c | 5 +++-- drivers/gpu/drm/xe/xe_mocs.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 7c91e01c47a3..9787546e4224 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -79,11 +79,12 @@ static void read_mocs_table(struct xe_gt *gt, struct kunit *test = xe_cur_kunit(); + KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index, + "Unused entries index should have been defined\n"); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); - drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, - "Unused entries index should have been defined\n"); for (i = 0; i < info->n_entries; i++) { mocs = get_entry_control(info, i); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index bdb755b96323..60ab34fd213f 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -470,9 +470,11 @@ static void __init_mocs_table(struct xe_gt *gt, unsigned int i; u32 mocs; - mocs_dbg(>_to_xe(gt)->drm, "mocs entries: %d\n", info->n_entries); drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, "Unused entries index should have been defined\n"); + + mocs_dbg(>_to_xe(gt)->drm, "mocs entries: %d\n", info->n_entries); + for (i = 0; i < info->n_entries; i++) { mocs = get_entry_control(info, i); -- cgit v1.2.3 From 50a9ffe61f7da1e8a55a1802cb010902dad423c8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 27 Feb 2024 22:10:47 -0800 Subject: drm/xe/mocs: Allow odd number of entries on test Refactor the mocs/l3cc kunit test to support odd number of entries. This switches out from the "check the register value" approach to check the entry value if it makes sense from the register read. This provides an easier output to reason about and cross check with bspec. Some code reordering and variable re-use was also done so the 2 functions follow more or less the same logic. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240228061048.3661978-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_mocs.c | 67 +++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 9787546e4224..25dd93ff1606 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -39,30 +39,37 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) static void read_l3cc_table(struct xe_gt *gt, const struct xe_mocs_info *info) { + struct kunit *test = xe_cur_kunit(); + struct xe_device *xe = gt_to_xe(gt); + u32 l3cc, l3cc_expected; unsigned int i; - u32 l3cc; u32 reg_val; u32 ret; - struct kunit *test = xe_cur_kunit(); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); - mocs_dbg(>_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries); - for (i = 0; i < (info->n_entries + 1) / 2; i++) { - l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), - get_entry_l3cc(info, 2 * i + 1)); - - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) - reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); - else - reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, - XELP_LNCFCMOCS(i).addr, reg_val, l3cc); - if (reg_val != l3cc) - KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n", - XELP_LNCFCMOCS(i).addr); + for (i = 0; i < info->n_entries; i++) { + if (!(i & 1)) { + if (GRAPHICS_VERx100(xe) >= 1250) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1)); + + mocs_dbg(&xe->drm, "reg_val=0x%x\n", reg_val); + } else { + /* Just re-use value read on previous iteration */ + reg_val >>= 16; + } + + l3cc_expected = get_entry_l3cc(info, i); + l3cc = reg_val & 0xffff; + + mocs_dbg(&xe->drm, "[%u] expected=0x%x actual=0x%x\n", + i, l3cc_expected, l3cc); + + KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc, + "l3cc idx=%u has incorrect val.\n", i); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } @@ -70,35 +77,35 @@ static void read_l3cc_table(struct xe_gt *gt, static void read_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { + struct kunit *test = xe_cur_kunit(); struct xe_device *xe = gt_to_xe(gt); - + u32 mocs, mocs_expected; unsigned int i; - u32 mocs; u32 reg_val; u32 ret; - struct kunit *test = xe_cur_kunit(); - KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index, "Unused entries index should have been defined\n"); ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); - mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); for (i = 0; i < info->n_entries; i++) { - mocs = get_entry_control(info, i); - - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + if (GRAPHICS_VERx100(xe) >= 1250) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, - XELP_GLOBAL_MOCS(i).addr, reg_val, mocs); - if (reg_val != mocs) - KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n", - XELP_GLOBAL_MOCS(i).addr); + + mocs_expected = get_entry_control(info, i); + mocs = reg_val; + + mocs_dbg(&xe->drm, "[%u] expected=0x%x actual=0x%x\n", + i, mocs_expected, mocs); + + KUNIT_EXPECT_EQ_MSG(test, mocs_expected, mocs, + "mocs reg 0x%x has incorrect val.\n", i); } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } -- cgit v1.2.3 From 67b7950333925db42efaf0616caf4e77decddda1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 27 Feb 2024 22:10:48 -0800 Subject: drm/xe/mocs: Fix DG2 kunit LNCFCMOCS31[31:16] is read-only for DG2 and MTL, so it's not possible to check set it. While trying to set doesn't cause any issue, later when it's read back to check if the value got correctly recorded causes the test to fail. Now that test is reliable for an odd number of entries, reduce it so the last entry is ignored. Bspec: 55267 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1253 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1233 Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240228061048.3661978-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mocs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 60ab34fd213f..f56f630fa29d 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -72,7 +72,7 @@ struct xe_mocs_info { /* Helper defines */ #define XELP_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ #define PVC_NUM_MOCS_ENTRIES 3 -#define MTL_NUM_MOCS_ENTRIES 16 +#define MTL_NUM_MOCS_ENTRIES 16 #define XE2_NUM_MOCS_ENTRIES 16 /* (e)LLC caching options */ @@ -401,7 +401,11 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->size = ARRAY_SIZE(dg2_mocs_desc); info->table = dg2_mocs_desc; info->uc_index = 1; - info->n_entries = XELP_NUM_MOCS_ENTRIES; + /* + * Last entry is RO on hardware, don't bother with what was + * written when checking later + */ + info->n_entries = XELP_NUM_MOCS_ENTRIES - 1; info->unused_entries_index = 3; break; case XE_DG1: -- cgit v1.2.3 From 27b5a3f237fe66dbf2288c2b50973aee8a427e41 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 29 Feb 2024 20:10:36 -0800 Subject: drm/xe: Fix ref counting leak on page fault If a page fault occurs on VM not in fault a ref can be leaked. Fix this. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20240301041036.238471-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index c26e4fcca01e..73c535193a98 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -146,10 +146,12 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) /* ASID to VM */ mutex_lock(&xe->usm.lock); vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm) + if (vm && xe_vm_in_fault_mode(vm)) xe_vm_get(vm); + else + vm = NULL; mutex_unlock(&xe->usm.lock); - if (!vm || !xe_vm_in_fault_mode(vm)) + if (!vm) return -EINVAL; retry_userptr: -- cgit v1.2.3 From 5224ed586ba7f9bba956655a1bfe5b75df7394d4 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sat, 2 Mar 2024 17:39:28 +0200 Subject: drm/xe: Replace 'grouped target' in Makefile with pattern rule Since 'grouped target' is used only in 'make' 4.3, it should be avoided. Replace it with 'multi-target pattern rule' which has the same behavior. Fixes: 9616e74b796c ("drm/xe: Add support for OOB workarounds") Signed-off-by: Dafna Hirschfeld Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240302153927.2602241-1-dhirschfeld@habana.ai [ reword commit message ] Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1a59c15f4d66..5a428ca00f10 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -42,7 +42,8 @@ generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob) -$(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules +$(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(srctree)/$(src)/xe_wa_oob.rules $(call cmd,wa_oob) uses_generated_oob := \ -- cgit v1.2.3 From 47058633d9c58d7da274a1de5e3588c17c7d7f56 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 27 Feb 2024 14:12:45 +0100 Subject: drm/xe: Move lrc snapshot capturing to xe_lrc.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the dumping of HWSP and HW Context without exporting more functions. Changes since v1: - GFP_KERNEL -> GFP_NOWAIT. (Souza) Signed-off-by: Maarten Lankhorst Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240227131248.92910-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 33 ++++++++--------------- drivers/gpu/drm/xe/xe_guc_submit_types.h | 13 +-------- drivers/gpu/drm/xe/xe_lrc.c | 45 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 4 +++ drivers/gpu/drm/xe/xe_lrc_types.h | 2 ++ 5 files changed, 63 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 29748e40555f..ed5c811208b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1820,21 +1820,14 @@ xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) snapshot->sched_props.preempt_timeout_us = q->sched_props.preempt_timeout_us; - snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), + snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), GFP_ATOMIC); if (snapshot->lrc) { for (i = 0; i < q->width; ++i) { struct xe_lrc *lrc = q->lrc + i; - snapshot->lrc[i].context_desc = - lower_32_bits(xe_lrc_ggtt_addr(lrc)); - snapshot->lrc[i].head = xe_lrc_ring_head(lrc); - snapshot->lrc[i].tail.internal = lrc->ring.tail; - snapshot->lrc[i].tail.memory = - xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); - snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); - snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); + snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); } } @@ -1900,18 +1893,9 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps drm_printf(p, "\tPreempt timeout: %u (us)\n", snapshot->sched_props.preempt_timeout_us); - for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { - drm_printf(p, "\tHW Context Desc: 0x%08x\n", - snapshot->lrc[i].context_desc); - drm_printf(p, "\tLRC Head: (memory) %u\n", - snapshot->lrc[i].head); - drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", - snapshot->lrc[i].tail.internal, - snapshot->lrc[i].tail.memory); - drm_printf(p, "\tStart seqno: (memory) %d\n", - snapshot->lrc[i].start_seqno); - drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); - } + for (i = 0; snapshot->lrc && i < snapshot->width; ++i) + xe_lrc_snapshot_print(snapshot->lrc[i], p); + drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); @@ -1936,10 +1920,15 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps */ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) { + int i; if (!snapshot) return; - kfree(snapshot->lrc); + if (snapshot->lrc) { + for (i = 0; i < snapshot->width; i++) + xe_lrc_snapshot_free(snapshot->lrc[i]); + kfree(snapshot->lrc); + } kfree(snapshot->pending_list); kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h index 72fc0f42b0a5..dc7456c34583 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -61,17 +61,6 @@ struct guc_submit_parallel_scratch { u32 wq[WQ_SIZE / sizeof(u32)]; }; -struct lrc_snapshot { - u32 context_desc; - u32 head; - struct { - u32 internal; - u32 memory; - } tail; - u32 start_seqno; - u32 seqno; -}; - struct pending_list_snapshot { u32 seqno; bool fence; @@ -109,7 +98,7 @@ struct xe_guc_submit_exec_queue_snapshot { } sched_props; /** @lrc: LRC Snapshot */ - struct lrc_snapshot *lrc; + struct xe_lrc_snapshot **lrc; /** @schedule_state: Schedule State at the moment of Crash */ u32 schedule_state; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 6c675d1bcb57..046d954a26c3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -32,6 +32,17 @@ #define ENGINE_CLASS_SHIFT 61 #define ENGINE_INSTANCE_SHIFT 48 +struct xe_lrc_snapshot { + u32 context_desc; + u32 head; + struct { + u32 internal; + u32 memory; + } tail; + u32 start_seqno; + u32 seqno; +}; + static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) { @@ -1331,3 +1342,37 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b bb->len += num_dw; } } + +struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) +{ + struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); + + if (!snapshot) + return NULL; + + snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc)); + snapshot->head = xe_lrc_ring_head(lrc); + snapshot->tail.internal = lrc->ring.tail; + snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); + snapshot->start_seqno = xe_lrc_start_seqno(lrc); + snapshot->seqno = xe_lrc_seqno(lrc); + return snapshot; +} + +void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) +{ + if (!snapshot) + return; + + drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); + drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); + drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", + snapshot->tail.internal, snapshot->tail.memory); + drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); + drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); +} + +void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) +{ + kfree(snapshot); +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 28b1d3f404d4..d6dd4045d6d7 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -55,4 +55,8 @@ void xe_lrc_dump_default(struct drm_printer *p, void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); +void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); +void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); + #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 24f20ed66fd1..b716df0dfb4e 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -43,4 +43,6 @@ struct xe_lrc { struct xe_hw_fence_ctx fence_ctx; }; +struct xe_lrc_snapshot; + #endif -- cgit v1.2.3 From 784b34100fc3c4a2c3c7f71009384bdb754f5517 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 27 Feb 2024 14:12:46 +0100 Subject: drm/xe: Add infrastructure for delayed LRC capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a xe_guc_exec_queue_snapshot_capture_delayed and xe_lrc_snapshot_capture_delayed function to capture the contents of LRC in the next patch. Signed-off-by: Maarten Lankhorst Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240227131248.92910-2-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 7 +++---- drivers/gpu/drm/xe/xe_guc_submit.c | 18 ++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_submit.h | 2 ++ drivers/gpu/drm/xe/xe_lrc.c | 8 ++++++++ drivers/gpu/drm/xe/xe_lrc.h | 1 + drivers/gpu/drm/xe/xe_vm.c | 3 +++ 6 files changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 68d3d623a05b..0fcd30680323 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -65,8 +65,8 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); - if (ss->vm) - xe_vm_snapshot_capture_delayed(ss->vm); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); } @@ -196,8 +196,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); } - if (ss->vm) - queue_work(system_unbound_wq, &ss->work); + queue_work(system_unbound_wq, &ss->work); xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ed5c811208b2..19efdb2f881f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1865,6 +1865,24 @@ xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) return snapshot; } +/** + * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. + * @snapshot: Previously captured snapshot of job. + * + * This captures some data that requires taking some locks, so it cannot be done in signaling path. + */ +void +xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) +{ + int i; + + if (!snapshot || !snapshot->lrc) + return; + + for (i = 0; i < snapshot->width; ++i) + xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); +} + /** * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. * @snapshot: GuC Submit Engine snapshot object. diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 723dc2bd8df9..2f14dfd04722 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -29,6 +29,8 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le struct xe_guc_submit_exec_queue_snapshot * xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job); void +xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot); +void xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, struct drm_printer *p); void diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 046d954a26c3..3f32b1bd7e91 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1359,6 +1359,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) return snapshot; } +void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) +{ + if (!snapshot) + return; + + /* TODO: Copy status page */ +} + void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) { if (!snapshot) diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index d6dd4045d6d7..d32fa31faa2c 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -56,6 +56,7 @@ void xe_lrc_dump_default(struct drm_printer *p, void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); +void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d28260351af2..643b3701a738 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3377,6 +3377,9 @@ out_unlock: void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) { + if (!snap) + return; + for (int i = 0; i < snap->num_snaps; i++) { struct xe_bo *bo = snap->snap[i].bo; struct iosys_map src; -- cgit v1.2.3 From 4d5242a003bb93c119e0346951a9938f60fecfb9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 27 Feb 2024 14:12:47 +0100 Subject: drm/xe: Implement capture of HWSP and HWCTX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dump the HWCTX and HWSP as part of LRC capture. Changes since v1: - Use same layout for HWSP and HWCTX as VM bo's, to simplify dumping. Signed-off-by: Maarten Lankhorst Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240227131248.92910-3-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 64 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 3f32b1bd7e91..ff639fe9a181 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,8 @@ #include "xe_lrc.h" +#include + #include "instructions/xe_mi_commands.h" #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_gfx_state_commands.h" @@ -33,6 +35,10 @@ #define ENGINE_INSTANCE_SHIFT 48 struct xe_lrc_snapshot { + struct xe_bo *lrc_bo; + void *lrc_snapshot; + unsigned long lrc_size, lrc_offset; + u32 context_desc; u32 head; struct { @@ -1356,19 +1362,47 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); snapshot->start_seqno = xe_lrc_start_seqno(lrc); snapshot->seqno = xe_lrc_seqno(lrc); + snapshot->lrc_bo = xe_bo_get(lrc->bo); + snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_snapshot = NULL; return snapshot; } void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { + struct xe_bo *bo; + struct iosys_map src; + if (!snapshot) return; - /* TODO: Copy status page */ + bo = snapshot->lrc_bo; + snapshot->lrc_bo = NULL; + + snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); + if (!snapshot->lrc_snapshot) + goto put_bo; + + dma_resv_lock(bo->ttm.base.resv, NULL); + if (!ttm_bo_vmap(&bo->ttm, &src)) { + xe_map_memcpy_from(xe_bo_device(bo), + snapshot->lrc_snapshot, &src, snapshot->lrc_offset, + snapshot->lrc_size); + ttm_bo_vunmap(&bo->ttm, &src); + } else { + kvfree(snapshot->lrc_snapshot); + snapshot->lrc_snapshot = NULL; + } + dma_resv_unlock(bo->ttm.base.resv); +put_bo: + xe_bo_put(bo); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) { + unsigned long i; + if (!snapshot) return; @@ -1378,9 +1412,37 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer snapshot->tail.internal, snapshot->tail.memory); drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); + + if (!snapshot->lrc_snapshot) + return; + + drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); + drm_puts(p, "\t[HWSP].data: "); + for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { + u32 *val = snapshot->lrc_snapshot + i; + char dumped[ASCII85_BUFSZ]; + + drm_puts(p, ascii85_encode(*val, dumped)); + } + + drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); + drm_puts(p, "\t[HWCTX].data: "); + for (; i < snapshot->lrc_size; i += sizeof(u32)) { + u32 *val = snapshot->lrc_snapshot + i; + char dumped[ASCII85_BUFSZ]; + + drm_puts(p, ascii85_encode(*val, dumped)); + } + drm_puts(p, "\n"); } void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) { + if (!snapshot) + return; + + kvfree(snapshot->lrc_snapshot); + if (snapshot->lrc_bo) + xe_bo_put(snapshot->lrc_bo); kfree(snapshot); } -- cgit v1.2.3 From 3b85b7bcccf45f674d35c0bf65aa84153397380d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 1 Mar 2024 13:05:23 -0500 Subject: drm/xe: Fix display runtime_pm handling i915's intel_runtime_pm_get_if_in_use actually calls the pm_runtime_get_if_active() with ign_usage_count = false, but Xe was erroneously calling it with true because of the mem_access cases. This can lead to unnecessary references getting hold here and device never getting into the runtime suspended state. Let's use directly the 'if_in_use' function provided by linux/pm_runtime. Also, already start this new function protected from the runtime recursion, since runtime_pm will need to call for display functions for a proper D3Cold flow. v2: Update commit message based on Matt's feedback. Fix return condition of pm_runtime_get_if_in_use (Matt) Cc: Anshuman Gupta Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240301180526.643505-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 2 +- drivers/gpu/drm/xe/xe_pm.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 420eba0e4be0..ad5864d1dd74 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -177,7 +177,7 @@ static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_p { struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - return xe_pm_runtime_get_if_active(xe); + return xe_pm_runtime_get_if_in_use(xe); } static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index b5511e3c3153..7d7df78f3a2f 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -457,6 +457,23 @@ int xe_pm_runtime_get_if_active(struct xe_device *xe) return pm_runtime_get_if_active(xe->drm.dev, true); } +/** + * xe_pm_runtime_get_if_in_use - Get a runtime_pm reference and resume if needed + * @xe: xe device instance + * + * Returns: True if device is awake and the reference was taken, false otherwise. + */ +bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) +{ + if (xe_pm_read_callback_task(xe) == current) { + /* The device is awake, grab the ref and move on */ + pm_runtime_get_noresume(xe->drm.dev); + return true; + } + + return pm_runtime_get_if_in_use(xe->drm.dev) > 0; +} + /** * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 7f5884babb29..bb4723a98405 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -30,6 +30,7 @@ int xe_pm_runtime_get(struct xe_device *xe); int xe_pm_runtime_get_ioctl(struct xe_device *xe); int xe_pm_runtime_put(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); +bool xe_pm_runtime_get_if_in_use(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); -- cgit v1.2.3 From d6b4137822a1f8d1a6676c18dff551b394557b65 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 1 Mar 2024 13:05:24 -0500 Subject: drm/xe: Create a xe_pm_runtime_resume_and_get variant for display Introduce the resume and get to fulfill the display need for checking if the device was actually resumed (or it is awake) and the reference was taken. Then we can convert the remaining cases to a void function and have individual functions for individual cases. Also, already start this new function protected from the runtime recursion, since runtime_pm will need to call for display functions for a proper D3Cold flow. Cc: Anshuman Gupta Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20240301180526.643505-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 6 +----- drivers/gpu/drm/xe/xe_pm.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index ad5864d1dd74..fef969112b1d 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -166,11 +166,7 @@ static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm) { struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - if (xe_pm_runtime_get(xe) < 0) { - xe_pm_runtime_put(xe); - return 0; - } - return 1; + return xe_pm_runtime_resume_and_get(xe); } static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 7d7df78f3a2f..3e13a666fcc7 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -474,6 +474,23 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) return pm_runtime_get_if_in_use(xe->drm.dev) > 0; } +/** + * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. + * @xe: xe device instance + * + * Returns: True if device is awake and the reference was taken, false otherwise. + */ +bool xe_pm_runtime_resume_and_get(struct xe_device *xe) +{ + if (xe_pm_read_callback_task(xe) == current) { + /* The device is awake, grab the ref and move on */ + pm_runtime_get_noresume(xe->drm.dev); + return true; + } + + return pm_runtime_resume_and_get(xe->drm.dev) >= 0; +} + /** * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index bb4723a98405..29c20099a3f9 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -31,6 +31,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe); int xe_pm_runtime_put(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); bool xe_pm_runtime_get_if_in_use(struct xe_device *xe); +bool xe_pm_runtime_resume_and_get(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); -- cgit v1.2.3 From 5c9da9fc64a1a6745175c7e77c3a0021a32560a5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 1 Mar 2024 13:05:25 -0500 Subject: drm/xe: Convert xe_pm_runtime_{get, put} to void and protect from recursion With mem_access going away and pm_runtime getting called instead, we need to protect these against recursions. The put is asynchronous so there's no need to block it. However, for a proper balance, we need to ensure that the references are taken and restored regardless of the flow. So, let's convert them all to void and use some direct linux/pm_runtime functions. v2: Rebased and update commit message (Matt). Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240301180526.643505-3-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 25 ++++++++++++++----------- drivers/gpu/drm/xe/xe_pm.h | 4 ++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 3e13a666fcc7..9fbb6f6c598a 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -408,26 +408,29 @@ out: /** * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously * @xe: xe device instance - * - * Returns: Any number greater than or equal to 0 for success, negative error - * code otherwise. */ -int xe_pm_runtime_get(struct xe_device *xe) +void xe_pm_runtime_get(struct xe_device *xe) { - return pm_runtime_get_sync(xe->drm.dev); + pm_runtime_get_noresume(xe->drm.dev); + + if (xe_pm_read_callback_task(xe) == current) + return; + + pm_runtime_resume(xe->drm.dev); } /** * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle * @xe: xe device instance - * - * Returns: Any number greater than or equal to 0 for success, negative error - * code otherwise. */ -int xe_pm_runtime_put(struct xe_device *xe) +void xe_pm_runtime_put(struct xe_device *xe) { - pm_runtime_mark_last_busy(xe->drm.dev); - return pm_runtime_put(xe->drm.dev); + if (xe_pm_read_callback_task(xe) == current) { + pm_runtime_put_noidle(xe->drm.dev); + } else { + pm_runtime_mark_last_busy(xe->drm.dev); + pm_runtime_put(xe->drm.dev); + } } /** diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 29c20099a3f9..0cb38ca244fe 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -26,9 +26,9 @@ void xe_pm_runtime_fini(struct xe_device *xe); bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); -int xe_pm_runtime_get(struct xe_device *xe); +void xe_pm_runtime_get(struct xe_device *xe); int xe_pm_runtime_get_ioctl(struct xe_device *xe); -int xe_pm_runtime_put(struct xe_device *xe); +void xe_pm_runtime_put(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); bool xe_pm_runtime_get_if_in_use(struct xe_device *xe); bool xe_pm_runtime_resume_and_get(struct xe_device *xe); -- cgit v1.2.3 From 198bc28d0a016831d788a2408cfe11fc09eee757 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 5 Mar 2024 09:35:03 -0800 Subject: drm/xe: Pipeline evict / restore of pinned BOs during suspend / resume Rather than waiting for each evict / restore of pinned BOs to complete just wait on migrate exec queue to be idle once during suspend / resume. Cc: Matthew Auld Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240305173503.285223-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 8 -------- drivers/gpu/drm/xe/xe_bo_evict.c | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6603a0ea79c5..b89ac6db68a1 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -816,7 +816,6 @@ out: * @bo: The buffer object to move. * * On successful completion, the object memory will be moved to sytem memory. - * This function blocks until the object has been fully moved. * * This is needed to for special handling of pinned VRAM object during * suspend-resume. @@ -873,9 +872,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (ret) goto err_res_free; - dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, - false, MAX_SCHEDULE_TIMEOUT); - return 0; err_res_free: @@ -888,7 +884,6 @@ err_res_free: * @bo: The buffer object to move. * * On successful completion, the object memory will be moved back to VRAM. - * This function blocks until the object has been fully moved. * * This is needed to for special handling of pinned VRAM object during * suspend-resume. @@ -930,9 +925,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (ret) goto err_res_free; - dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, - false, MAX_SCHEDULE_TIMEOUT); - return 0; err_res_free: diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 7a264a9ca06e..630695088b96 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -220,7 +220,7 @@ int xe_bo_restore_user(struct xe_device *xe) list_splice_tail(&still_in_list, &xe->pinned.external_vram); spin_unlock(&xe->pinned.lock); - /* Wait for validate to complete */ + /* Wait for restore to complete */ for_each_tile(tile, xe, id) xe_tile_migrate_wait(tile); -- cgit v1.2.3 From 81f8729dbc0bbb21420d820e54253f804d88c530 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Feb 2024 12:38:04 +0530 Subject: drm/xe/xelpg: Recognize graphics version 12.74 as Xe_LPG Graphics version 12.74 (which is technically called "Xe_LPG+") should be handled the same as versions Xe_LPG 12.70/12.71 by the KMD. Only the workaround lists (handled in the next patch) will be a bit different. Bspec: 55420 Signed-off-by: Matt Roper Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20240229070806.3402641-2-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 1 + drivers/gpu/drm/xe/xe_tuning.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 557f2d88a8c1..c401d4890386 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -343,6 +343,7 @@ __diag_pop(); static const struct gmdid_map graphics_ip_map[] = { { 1270, &graphics_xelpg }, { 1271, &graphics_xelpg }, + { 1274, &graphics_xelpg }, /* Xe_LPG+ */ { 2004, &graphics_xe2 }, }; diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 5c83c75bc497..bb6db2817ada 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -50,7 +50,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { static const struct xe_rtp_entry_sr engine_tunings[] = { { XE_RTP_NAME("Tuning: Set Indirect State Override"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, @@ -88,7 +88,7 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { /* Xe_LPG */ { XE_RTP_NAME("Tuning: L3 cache"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, -- cgit v1.2.3 From 70e860298478ecd0602ee436ded667356ecd6c9d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Feb 2024 12:38:05 +0530 Subject: drm/xe/xelpg: Extend some workarounds to graphics version 12.74 A handful of Xe_LPG workarounds are also relevant to graphics version 12.74 as well. Extend the graphics version range for these workarounds accordingly. Signed-off-by: Matt Roper Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20240229070806.3402641-3-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/xe/tests/xe_wa_test.c | 1 + drivers/gpu/drm/xe/xe_wa.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 44570d888355..9d0c715142b9 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -71,6 +71,7 @@ static const struct platform_test_case cases[] = { SUBPLATFORM_CASE(DG2, G12, A1), GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), }; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a0264eedd443..bcc285f45a1e 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -173,11 +173,11 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) }, { XE_RTP_NAME("14018575942"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) }, { XE_RTP_NAME("22016670082"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR)) }, @@ -397,7 +397,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe_LPG */ { XE_RTP_NAME("14017856879"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH)) }, @@ -537,7 +537,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* Xe_LPG */ { XE_RTP_NAME("18019271663"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, { XE_RTP_NAME("14019877138"), -- cgit v1.2.3 From 6a9b38cc68a03b948df1f3fcb907c7557cfc315c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 5 Mar 2024 16:40:49 -0800 Subject: drm/xe/arl: Add Arrow Lake H support ARL-H uses the same media and display IP as MTL, and a version 12.74 graphics IP (referred to as Xe_LPG+). From a driver point of view, we should be able to just treat the whole platform as MTL and rely on GRAPHICS_VERx100 checks to handle any spots where ARL's Xe_LPG+ needs different handling from MTL's Xe_LPG (i.e., workarounds). v2: Resolve conflict and Reorder PCI ids in sorted order v3: Append signed-off-by commiter to this commit Bspec: 55420 Signed-off-by: Matt Roper Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20240229070806.3402641-4-dnyaneshwar.bhadane@intel.com --- include/drm/xe_pciids.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index de1a344737bc..bc7cbef6e9d8 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -176,10 +176,13 @@ /* MTL / ARL */ #define XE_MTL_IDS(MACRO__, ...) \ MACRO__(0x7D40, ## __VA_ARGS__), \ + MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) #define XE_LNL_IDS(MACRO__, ...) \ -- cgit v1.2.3 From 27ee413bbc0b04146f4ee1c7444422bf18dafd47 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 29 Feb 2024 11:45:20 -0800 Subject: drm/xe: Do not grab forcewakes when issuing GGTT TLB invalidation via GuC Forcewakes are not required for communication with the GuC via CTB as it is a memory based interfaced. Acquring forcewakes takes considerable time. With that, do not grab a forcewake when issuing a GGTT TLB invalidation via the GuC. Cc: Maarten Lankhorst Cc: Lucas De Marchi Cc: Matt Roper Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240229194520.200642-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 7 ------- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 ++ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 717d0e76277a..5e739513ab0a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -257,16 +257,9 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) if (!gt) return; - /* - * Invalidation can happen when there's no in-flight work keeping the - * GT awake. We need to explicitly grab forcewake to ensure the GT - * and GuC are accessible. - */ - xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); err = xe_gt_tlb_invalidation_ggtt(gt); if (err) drm_warn(&xe->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } void xe_ggtt_invalidate(struct xe_ggtt *ggtt) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f29ee1ccfa71..a3c4ffba679d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,6 +247,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_wait(gt, seqno); } else if (xe_device_uc_enabled(xe)) { + xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); @@ -256,6 +257,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_mmio_write32(gt, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE); } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } return 0; -- cgit v1.2.3 From c3fbdabd3181034e7c9b0c49bf8737499d21fafd Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Wed, 6 Mar 2024 08:17:42 +0530 Subject: drm/i915/hdcp: Move intel_hdcp_gsc_message def away from header file Move intel_hdcp_gsc_message definition into intel_hdcp_gsc.c so that intel_hdcp_gsc_message can be redefined for xe as needed. --v2 -Correct commit message to reflect what patch is actually doing [Arun] Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Acked-by: Jani Nikula Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20240306024741.1858039-2-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_hdcp_gsc.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_hdcp_gsc.h | 7 +------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c index 302bff75b06c..35823e1f65d6 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c @@ -13,6 +13,12 @@ #include "intel_hdcp_gsc.h" #include "intel_hdcp_gsc_message.h" +struct intel_hdcp_gsc_message { + struct i915_vma *vma; + void *hdcp_cmd_in; + void *hdcp_cmd_out; +}; + bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915) { return DISPLAY_VER(i915) >= 14; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h index eba2057c5a9e..5f610df61cc9 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h @@ -10,12 +10,7 @@ #include struct drm_i915_private; - -struct intel_hdcp_gsc_message { - struct i915_vma *vma; - void *hdcp_cmd_in; - void *hdcp_cmd_out; -}; +struct intel_hdcp_gsc_message; bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915); ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in, -- cgit v1.2.3 From b8e7996f4126d89547ff410fe5ddbc7e47e80cee Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Wed, 6 Mar 2024 08:12:46 +0530 Subject: drm/xe/hdcp: Use xe_device struct Use xe_device struct instead of drm_i915_private so as to not cause confusion and comply with Xe standards as drm_i915_private is xe_device under the hood. --v2 -Fix commit message [Daniele] Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20240306024247.1857881-4-suraj.kandpal@intel.com --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 0f11a39333e2..5d1d0054b578 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -3,30 +3,31 @@ * Copyright 2023, Intel Corporation. */ -#include "i915_drv.h" +#include #include "intel_hdcp_gsc.h" +#include "xe_device_types.h" -bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915) +bool intel_hdcp_gsc_cs_required(struct xe_device *xe) { return true; } -bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915) +bool intel_hdcp_gsc_check_status(struct xe_device *xe) { return false; } -int intel_hdcp_gsc_init(struct drm_i915_private *i915) +int intel_hdcp_gsc_init(struct xe_device *xe) { - drm_info(&i915->drm, "HDCP support not yet implemented\n"); + drm_dbg_kms(&xe->drm, "HDCP support not yet implemented\n"); return -ENODEV; } -void intel_hdcp_gsc_fini(struct drm_i915_private *i915) +void intel_hdcp_gsc_fini(struct xe_device *xe) { } -ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in, +ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, size_t msg_in_len, u8 *msg_out, size_t msg_out_len) { -- cgit v1.2.3 From 4af50beb4e0f9e6aed9cd53436c099f1dba826f1 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Wed, 6 Mar 2024 08:12:47 +0530 Subject: drm/xe: Use gsc_proxy_init_done to check proxy status Expose gsc_proxy_init_done so that we can check if gsc proxy has been initialized or not. --v2 -Check if GSC FW is enabled before taking forcewake ref [Daniele] --v3 -Directly call proxy check function inside if condition Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20240306024247.1857881-5-suraj.kandpal@intel.com --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 29 ++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_gsc_proxy.c | 4 ++-- drivers/gpu/drm/xe/xe_gsc_proxy.h | 1 + 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 5d1d0054b578..3af5a86db3aa 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -4,8 +4,14 @@ */ #include + #include "intel_hdcp_gsc.h" #include "xe_device_types.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gsc_proxy.h" +#include "xe_pm.h" +#include "xe_uc_fw.h" bool intel_hdcp_gsc_cs_required(struct xe_device *xe) { @@ -14,7 +20,28 @@ bool intel_hdcp_gsc_cs_required(struct xe_device *xe) bool intel_hdcp_gsc_check_status(struct xe_device *xe) { - return false; + struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_gt *gt = tile->media_gt; + bool ret = true; + + if (!xe_uc_fw_is_enabled(>->uc.gsc.fw)) + return false; + + xe_pm_runtime_get(xe); + if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) { + drm_dbg_kms(&xe->drm, + "failed to get forcewake to check proxy status\n"); + ret = false; + goto out; + } + + if (!xe_gsc_proxy_init_done(>->uc.gsc)) + ret = false; + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); +out: + xe_pm_runtime_put(xe); + return ret; } int intel_hdcp_gsc_init(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 309ef80e3b95..1ced6b4d4946 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -66,7 +66,7 @@ static inline struct xe_device *kdev_to_xe(struct device *kdev) return dev_get_drvdata(kdev); } -static bool gsc_proxy_init_done(struct xe_gsc *gsc) +bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)); @@ -528,7 +528,7 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc) if (err) return err; - if (!gsc_proxy_init_done(gsc)) { + if (!xe_gsc_proxy_init_done(gsc)) { xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n"); return -EIO; } diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index 908f9441f093..c511ade6b863 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -11,6 +11,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); +bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); void xe_gsc_proxy_remove(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); -- cgit v1.2.3 From 152f2df954d8703f8bb4807603d279fd8f173071 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Wed, 6 Mar 2024 08:12:48 +0530 Subject: drm/xe/hdcp: Enable HDCP for XE Enable HDCP for Xe by defining functions which take care of interaction of HDCP as a client with the GSC CS interface. Add intel_hdcp_gsc_message to Makefile and add corresponding changes to xe_hdcp_gsc.c to make it build. --v2 -add kfree at appropriate place [Daniele] -remove useless define [Daniele] -move host session logic to xe_gsc_submit.c [Daniele] -call xe_gsc_check_and_update_pending directly in an if condition [Daniele] -use xe_device instead of drm_i915_private [Daniele] --v3 -use xe prefix for newly exposed function [Daniele] -remove client specific defines from intel_gsc_mtl_header [Daniele] -add missing kfree() [Daniele] -have NULL check for hdcp_message in finish function [Daniele] -dont have too many variable declarations in the same line [Daniele] --v4 -don't point the hdcp_message structure in xe_device to anything until it properly gets initialized [Daniele] --v5 -Squash commits for buildability --v6 -Order includes alphabetically [Lucas] Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20240306024247.1857881-6-suraj.kandpal@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 202 ++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_gsc_submit.c | 15 +++ drivers/gpu/drm/xe/xe_gsc_submit.h | 1 + 4 files changed, 214 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 5a428ca00f10..3c3e67885559 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -257,6 +257,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_global_state.o \ i915-display/intel_gmbus.o \ i915-display/intel_hdcp.o \ + i915-display/intel_hdcp_gsc_message.o \ i915-display/intel_hdmi.o \ i915-display/intel_hotplug.o \ i915-display/intel_hotplug_irq.o \ diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 3af5a86db3aa..dcde1d0ac1f8 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -4,15 +4,32 @@ */ #include +#include +#include +#include "abi/gsc_command_header_abi.h" #include "intel_hdcp_gsc.h" -#include "xe_device_types.h" +#include "intel_hdcp_gsc_message.h" +#include "xe_bo.h" #include "xe_device.h" -#include "xe_gt.h" +#include "xe_device_types.h" #include "xe_gsc_proxy.h" +#include "xe_gsc_submit.h" +#include "xe_gt.h" +#include "xe_map.h" #include "xe_pm.h" #include "xe_uc_fw.h" +#define HECI_MEADDRESS_HDCP 18 + +struct intel_hdcp_gsc_message { + struct xe_bo *hdcp_bo; + u64 hdcp_cmd_in; + u64 hdcp_cmd_out; +}; + +#define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header) + bool intel_hdcp_gsc_cs_required(struct xe_device *xe) { return true; @@ -44,19 +61,194 @@ out: return ret; } +/*This function helps allocate memory for the command that we will send to gsc cs */ +static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, + struct intel_hdcp_gsc_message *hdcp_message) +{ + struct xe_bo *bo = NULL; + u64 cmd_in, cmd_out; + int ret = 0; + + /* allocate object of two page for HDCP command memory and store it */ + xe_device_mem_access_get(xe); + bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + + if (IS_ERR(bo)) { + drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); + ret = PTR_ERR(bo); + goto out; + } + + cmd_in = xe_bo_ggtt_addr(bo); + cmd_out = cmd_in + PAGE_SIZE; + xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + + hdcp_message->hdcp_bo = bo; + hdcp_message->hdcp_cmd_in = cmd_in; + hdcp_message->hdcp_cmd_out = cmd_out; +out: + xe_device_mem_access_put(xe); + return ret; +} + +static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe) +{ + struct intel_hdcp_gsc_message *hdcp_message; + int ret; + + hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL); + + if (!hdcp_message) + return -ENOMEM; + + /* + * NOTE: No need to lock the comp mutex here as it is already + * going to be taken before this function called + */ + ret = intel_hdcp_gsc_initialize_message(xe, hdcp_message); + if (ret) { + drm_err(&xe->drm, "Could not initialize hdcp_message\n"); + kfree(hdcp_message); + return ret; + } + + xe->display.hdcp.hdcp_message = hdcp_message; + return ret; +} + +static const struct i915_hdcp_ops gsc_hdcp_ops = { + .initiate_hdcp2_session = intel_hdcp_gsc_initiate_session, + .verify_receiver_cert_prepare_km = + intel_hdcp_gsc_verify_receiver_cert_prepare_km, + .verify_hprime = intel_hdcp_gsc_verify_hprime, + .store_pairing_info = intel_hdcp_gsc_store_pairing_info, + .initiate_locality_check = intel_hdcp_gsc_initiate_locality_check, + .verify_lprime = intel_hdcp_gsc_verify_lprime, + .get_session_key = intel_hdcp_gsc_get_session_key, + .repeater_check_flow_prepare_ack = + intel_hdcp_gsc_repeater_check_flow_prepare_ack, + .verify_mprime = intel_hdcp_gsc_verify_mprime, + .enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication, + .close_hdcp_session = intel_hdcp_gsc_close_session, +}; + int intel_hdcp_gsc_init(struct xe_device *xe) { - drm_dbg_kms(&xe->drm, "HDCP support not yet implemented\n"); - return -ENODEV; + struct i915_hdcp_arbiter *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + mutex_lock(&xe->display.hdcp.hdcp_mutex); + xe->display.hdcp.arbiter = data; + xe->display.hdcp.arbiter->hdcp_dev = xe->drm.dev; + xe->display.hdcp.arbiter->ops = &gsc_hdcp_ops; + ret = intel_hdcp_gsc_hdcp2_init(xe); + if (ret) + kfree(data); + + mutex_unlock(&xe->display.hdcp.hdcp_mutex); + + return ret; } void intel_hdcp_gsc_fini(struct xe_device *xe) { + struct intel_hdcp_gsc_message *hdcp_message = + xe->display.hdcp.hdcp_message; + + if (!hdcp_message) + return; + + xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo); + kfree(hdcp_message); +} + +static int xe_gsc_send_sync(struct xe_device *xe, + struct intel_hdcp_gsc_message *hdcp_message, + u32 msg_size_in, u32 msg_size_out, + u32 addr_out_off) +{ + struct xe_gt *gt = hdcp_message->hdcp_bo->tile->media_gt; + struct iosys_map *map = &hdcp_message->hdcp_bo->vmap; + struct xe_gsc *gsc = >->uc.gsc; + int ret; + + ret = xe_gsc_pkt_submit_kernel(gsc, hdcp_message->hdcp_cmd_in, msg_size_in, + hdcp_message->hdcp_cmd_out, msg_size_out); + if (ret) { + drm_err(&xe->drm, "failed to send gsc HDCP msg (%d)\n", ret); + return ret; + } + + if (xe_gsc_check_and_update_pending(xe, map, 0, map, addr_out_off)) + return -EAGAIN; + + ret = xe_gsc_read_out_header(xe, map, addr_out_off, + sizeof(struct hdcp_cmd_header), NULL); + + return ret; } ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, size_t msg_in_len, u8 *msg_out, size_t msg_out_len) { - return -ENODEV; + const size_t max_msg_size = PAGE_SIZE - HDCP_GSC_HEADER_SIZE; + struct intel_hdcp_gsc_message *hdcp_message; + u64 host_session_id; + u32 msg_size_in, msg_size_out; + u32 addr_out_off, addr_in_wr_off = 0; + int ret, tries = 0; + + if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) { + ret = -ENOSPC; + goto out; + } + + msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE; + msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE; + hdcp_message = xe->display.hdcp.hdcp_message; + addr_out_off = PAGE_SIZE; + + host_session_id = xe_gsc_create_host_session_id(); + xe_device_mem_access_get(xe); + addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap, + addr_in_wr_off, HECI_MEADDRESS_HDCP, + host_session_id, msg_in_len); + xe_map_memcpy_to(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off, + msg_in, msg_in_len); + /* + * Keep sending request in case the pending bit is set no need to add + * message handle as we are using same address hence loc. of header is + * same and it will contain the message handle. we will send the message + * 20 times each message 50 ms apart + */ + do { + ret = xe_gsc_send_sync(xe, hdcp_message, msg_size_in, msg_size_out, + addr_out_off); + + /* Only try again if gsc says so */ + if (ret != -EAGAIN) + break; + + msleep(50); + + } while (++tries < 20); + + if (ret) + goto out; + + xe_map_memcpy_from(xe, msg_out, &hdcp_message->hdcp_bo->vmap, + addr_out_off + HDCP_GSC_HEADER_SIZE, + msg_out_len); + +out: + xe_device_mem_access_put(xe); + return ret; } diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c index 348994b271be..9a18f5667db3 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.c +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -40,6 +40,21 @@ gsc_to_gt(struct xe_gsc *gsc) return container_of(gsc, struct xe_gt, uc.gsc); } +/** + * xe_gsc_get_host_session_id - Creates a random 64 bit host_session id with + * bits 56-63 masked. + * + * Returns: random host_session_id which can be used to send messages to gsc cs + */ +u64 xe_gsc_create_host_session_id(void) +{ + u64 host_session_id; + + get_random_bytes(&host_session_id, sizeof(u64)); + host_session_id &= ~HOST_SESSION_CLIENT_MASK; + return host_session_id; +} + /** * xe_gsc_emit_header - write the MTL GSC header in memory * @xe: the Xe device diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h index 1939855031a6..1416b5745a4c 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.h +++ b/drivers/gpu/drm/xe/xe_gsc_submit.h @@ -28,4 +28,5 @@ int xe_gsc_read_out_header(struct xe_device *xe, int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in, u64 addr_out, u32 size_out); +u64 xe_gsc_create_host_session_id(void); #endif -- cgit v1.2.3 From e89f4967d90c8fe9475913e5c0d9909948602ae1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Mar 2024 15:31:03 -0800 Subject: drm/xe: Drop WA 16015675438 With dynamic load-balancing disabled on the compute side, there's no reason left to enable WA 16015675438. Drop it from both PVC and DG2. Note that this can be done because now the driver always set a fixed partition of EUs during initialization via the ccs_mode configuration. Cc: Mateusz Jablonski Cc: Michal Mrozek Reviewed-by: Rodrigo Vivi Acked-by: Michal Mrozek Acked-by: Mateusz Jablonski Link: https://patchwork.freedesktop.org/patch/msgid/20240304233103.1687412-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_wa.c | 11 ----------- drivers/gpu/drm/xe/xe_wa_oob.rules | 3 --- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 0d2a2dd13f11..caa86ccbe9e7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -164,7 +164,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; - if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) && + if (XE_WA(gt, 18020744125) && !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index bcc285f45a1e..2460c570e628 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -328,12 +328,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) }, - { XE_RTP_NAME("16015675438"), - XE_RTP_RULES(PLATFORM(DG2), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), - PERF_FIX_BALANCING_CFE_DISABLE)) - }, { XE_RTP_NAME("18028616096"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), @@ -383,11 +377,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) }, - { XE_RTP_NAME("16015675438"), - XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), - PERF_FIX_BALANCING_CFE_DISABLE)) - }, { XE_RTP_NAME("14014999345"), XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), GRAPHICS_STEP(B0, C0)), diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index b138cbd51bdb..48cdba1cbf95 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -4,9 +4,6 @@ 22011391025 PLATFORM(DG2) 22012727170 SUBPLATFORM(DG2, G11) 22012727685 SUBPLATFORM(DG2, G11) -16015675438 PLATFORM(PVC) - SUBPLATFORM(DG2, G10) - SUBPLATFORM(DG2, G12) 18020744125 PLATFORM(PVC) 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) -- cgit v1.2.3 From 79f944eeddc9fcb4479844c57c495b621970c932 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 23 Feb 2024 15:30:43 +0100 Subject: drm/xe: Remove unused 'create' parameter from queue property logic The 'create' parameter in exec_queue_user_extensions was always true. This commit removes the dead parameter and all the relevant dead code. v2: rebase. Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240223143043.22779-1-nirmoy.das@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 11e150f4c0c1..6a83bc57826a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -31,7 +31,7 @@ enum xe_exec_queue_sched_prop { }; static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number, bool create); + u64 extensions, int ext_number); static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, struct xe_vm *vm, @@ -79,7 +79,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, * may set q->usm, must come before xe_lrc_init(), * may overwrite q->sched_props, must come before q->ops->init() */ - err = exec_queue_user_extensions(xe, q, extensions, 0, true); + err = exec_queue_user_extensions(xe, q, extensions, 0); if (err) { kfree(q); return ERR_PTR(err); @@ -268,7 +268,7 @@ xe_exec_queue_device_get_max_priority(struct xe_device *xe) } static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) + u64 value) { if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) return -EINVAL; @@ -276,9 +276,6 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) return -EPERM; - if (!create) - return q->ops->set_priority(q, value); - q->sched_props.priority = value; return 0; } @@ -336,7 +333,7 @@ xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, } static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) + u64 value) { u32 min = 0, max = 0; @@ -347,16 +344,13 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * !xe_hw_engine_timeout_in_range(value, min, max)) return -EINVAL; - if (!create) - return q->ops->set_timeslice(q, value); - q->sched_props.timeslice_us = value; return 0; } typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create); + u64 value); static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, @@ -365,8 +359,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { static int exec_queue_user_ext_set_property(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension, - bool create) + u64 extension) { u64 __user *address = u64_to_user_ptr(extension); struct drm_xe_ext_set_property ext; @@ -388,13 +381,12 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (!exec_queue_set_property_funcs[idx]) return -EINVAL; - return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); + return exec_queue_set_property_funcs[idx](xe, q, ext.value); } typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension, - bool create); + u64 extension); static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, @@ -402,7 +394,7 @@ static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { #define MAX_USER_EXTENSIONS 16 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number, bool create) + u64 extensions, int ext_number) { u64 __user *address = u64_to_user_ptr(extensions); struct drm_xe_user_extension ext; @@ -423,13 +415,13 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue idx = array_index_nospec(ext.name, ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create); + err = exec_queue_user_extension_funcs[idx](xe, q, extensions); if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number, create); + ++ext_number); return 0; } -- cgit v1.2.3 From af7b93d1d7eeeef674681ddea875be6a29857a5d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 6 Mar 2024 15:31:10 -0500 Subject: drm/xe: Return immediately on tile_init failure There's no reason to proceed with applying workaround and initing sysfs if we are going to abort the probe upon failure. Fixes: e5a845fd8fa4 ("drm/xe: Add sysfs entry for tile") Cc: Lucas De Marchi Cc: Matt Roper Cc: Matthew Auld Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240306203110.146387-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_tile.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 044c20881de7..0650b2fa75ef 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -167,9 +167,10 @@ int xe_tile_init_noalloc(struct xe_tile *tile) goto err_mem_access; tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); - if (IS_ERR(tile->mem.kernel_bb_pool)) + if (IS_ERR(tile->mem.kernel_bb_pool)) { err = PTR_ERR(tile->mem.kernel_bb_pool); - + goto err_mem_access; + } xe_wa_apply_tile_workarounds(tile); xe_tile_sysfs_init(tile); -- cgit v1.2.3 From 51e9ddc668c7d8b9ef9c0037fe4aa09387dbee2a Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Sun, 4 Feb 2024 14:23:24 +0800 Subject: drm/xe: Declare __xe_lrc_*_ggtt_addr with __maybe__unused Kernel test robot reports building error: drivers/gpu/drm/xe/xe_lrc.c:544:1: error: unused function '__xe_lrc_regs_ggtt_addr' [-Werror,-Wunused-function] 544 | DECL_MAP_ADDR_HELPERS(regs) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/xe/xe_lrc.c:536:19: note: expanded from macro 'DECL_MAP_ADDR_HELPERS' 536 | static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ Declare __xe_lrc_*_ggtt_addr with __maybe_unused to address it. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402010928.g3j2aSBL-lkp@intel.com/ Signed-off-by: Dawei Li Link: https://patchwork.freedesktop.org/patch/msgid/20240204062324.3548268-1-dawei.li@shingroup.cn Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index ff639fe9a181..3c4d31703207 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -655,7 +655,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ return map; \ } \ -static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ +static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ { \ return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ } \ -- cgit v1.2.3 From 2a2e1107ef037bbeb29d7d78ae31825051fb174f Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 5 Mar 2024 21:19:59 -0800 Subject: drm/xe: Fix NULL check in xe_ggtt_init() The null check for GT is after calling gt_to_xe, fix it. Fixes: 3121fed0c51b ("drm/xe: Cleanup some layering in GGTT") Cc: Matthew Brost Signed-off-by: Matthew Brost Signed-off-by: Maarten Lankhorst Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240306052002.311196-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 5e739513ab0a..325337c38961 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -251,7 +251,6 @@ err: static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); int err; if (!gt) @@ -259,7 +258,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) err = xe_gt_tlb_invalidation_ggtt(gt); if (err) - drm_warn(&xe->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); + drm_warn(>_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); } void xe_ggtt_invalidate(struct xe_ggtt *ggtt) -- cgit v1.2.3 From 310f9d137c0e090f53c8d02e1c33b88de7086a6e Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Thu, 7 Mar 2024 10:25:34 +0530 Subject: drm/xe/gsc: Fix kernel doc for xe_gsc_create_host_session_id Fix documentation for xe_gsc_create_host_session_id which was xe_gsc_get_host_session_id. Fixes: 152f2df954d8 ("drm/xe/hdcp: Enable HDCP for XE") Signed-off-by: Suraj Kandpal Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240307045533.1867892-2-suraj.kandpal@intel.com --- drivers/gpu/drm/xe/xe_gsc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c index 9a18f5667db3..d34d03248843 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.c +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -41,7 +41,7 @@ gsc_to_gt(struct xe_gsc *gsc) } /** - * xe_gsc_get_host_session_id - Creates a random 64 bit host_session id with + * xe_gsc_create_host_session_id - Creates a random 64 bit host_session id with * bits 56-63 masked. * * Returns: random host_session_id which can be used to send messages to gsc cs -- cgit v1.2.3 From fb4511c9e63b2d908497d87ae82e6da688c96b80 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 6 Mar 2024 11:21:27 -0800 Subject: drm/xe: Remove unused FF_SLICE_CS_CHICKEN2 Commit e89f4967d90c ("drm/xe: Drop WA 16015675438") removed the only user of that register and should have removed it. Remove it now. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240306192128.1895603-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 0b1266c88a6a..a08528d9c76b 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -104,9 +104,6 @@ #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) -#define FF_SLICE_CS_CHICKEN2(base) XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED) -#define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) - #define CS_DEBUG_MODE1(base) XE_REG((base) + 0xec, XE_REG_OPTION_MASKED) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define REPLAY_MODE_GRANULARITY REG_BIT(0) -- cgit v1.2.3 From 71e721485c77bd74a8af44f7907f72af1dec1af6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 6 Mar 2024 11:21:28 -0800 Subject: drm/xe/pvc: Fix WA 18020744125 With the current state GUC_WA_RCS_REGS_IN_CCS_REGS_LIST could in theory be removed since there is no render register being added to the list of compute WAs. However the real issue is that 18020744125 is incomplete and not setting the RING_HWSTAM on render as it should. Writing this in RTP is a little more tricky as we want to write to another's engine base when the match happens: first compute engine and no render present. So use RING_HWSTAM(RENDER_RING_BASE) instead of the usual XE_RTP_ACTION_FLAG(ENGINE_BASE). Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240306192128.1895603-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 2460c570e628..54740d246310 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -377,6 +377,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) }, + { XE_RTP_NAME("18020744125"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute), + ENGINE_CLASS(COMPUTE)), + XE_RTP_ACTIONS(SET(RING_HWSTAM(RENDER_RING_BASE), ~0)) + }, { XE_RTP_NAME("14014999345"), XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), GRAPHICS_STEP(B0, C0)), -- cgit v1.2.3 From 89d030804831c4075496629343ae3bb3ae8ff58f Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 8 Mar 2024 21:19:40 +0530 Subject: drm/xe/hdcp: Fix condition for hdcp gsc cs requirement Add condition for check of hdcp gsc cs requirement rather than assuming gsc cs to always be required when xe is loaded. It is not required for display version < 14 --v2 -Use display version in commit message [Lucas] Fixes: 152f2df954d8 ("drm/xe/hdcp: Enable HDCP for XE") Signed-off-by: Suraj Kandpal Acked-by: Jani Nikula Reviewed-by: Lucas De Marchi Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240308154939.1940960-2-suraj.kandpal@intel.com --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index dcde1d0ac1f8..25c73602ef55 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -32,7 +32,7 @@ struct intel_hdcp_gsc_message { bool intel_hdcp_gsc_cs_required(struct xe_device *xe) { - return true; + return DISPLAY_VER(xe) >= 14; } bool intel_hdcp_gsc_check_status(struct xe_device *xe) -- cgit v1.2.3 From 8e61e3192a77e1006c1c54c022173026f7afae2c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 12 Mar 2024 14:12:25 -0700 Subject: drm/xe/uapi: Add IP version and stepping to GT list query For modern platforms (MTL and later), both kernel and userspace drivers are expected to apply GT programming and workarounds based on the IP version and stepping self-reported by the GT hardware via the GMD_ID registers. Since userspace drivers can't access these registers directly, pass along the version and stepping information via the GT list query. Note that the new query fields will remain 0's when running on pre-GMD_ID platforms. Userspace is expected to continue using PCI devid / revid on those older platforms. Although the hardware also has a GMD_ID register for display version/stepping, that value is intentionally *not* included anywhere in the Xe uapi. Display userspace should be using platform-agnostic APIs and auto-detecting platform capabilities rather than matching specific IP versions. v2: - s/revid/rev/ (Lucas) - Fix kerneldoc copy/paste mistakes Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240312211229.2871288-4-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_query.c | 8 ++++++++ include/uapi/drm/xe_drm.h | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index a6a20a6dd360..e80321b34918 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -12,6 +12,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" @@ -401,6 +402,13 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query BIT(gt_to_tile(gt)->id) << 1; gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ gt_list->gt_list[id].near_mem_regions; + + gt_list->gt_list[id].ip_ver_major = + REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); + gt_list->gt_list[id].ip_ver_minor = + REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); + gt_list->gt_list[id].ip_ver_rev = + REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); } if (copy_to_user(query_ptr, gt_list, size)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2fc19177d2b0..808ad1c308ec 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -459,8 +459,16 @@ struct drm_xe_gt { * by struct drm_xe_query_mem_regions' mem_class. */ __u64 far_mem_regions; + /** @ip_ver_major: Graphics/media IP major version on GMD_ID platforms */ + __u16 ip_ver_major; + /** @ip_ver_minor: Graphics/media IP minor version on GMD_ID platforms */ + __u16 ip_ver_minor; + /** @ip_ver_rev: Graphics/media IP revision version on GMD_ID platforms */ + __u16 ip_ver_rev; + /** @pad2: MBZ */ + __u16 pad2; /** @reserved: Reserved */ - __u64 reserved[8]; + __u64 reserved[7]; }; /** -- cgit v1.2.3 From 521db22a1d70dbc596a07544a738416025b1b63c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 12 Mar 2024 11:39:07 -0700 Subject: drm/xe: Invalidate userptr VMA on page pin fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than return an error to the user or ban the VM when userptr VMA page pin fails with -EFAULT, invalidate VMA mappings. This supports the UMD use case of freeing userptr while still having bindings. Now that non-faulting VMs can invalidate VMAs, drop the usm prefix for the tile_invalidated member. v2: - Fix build error (CI) v3: - Don't invalidate VMA if in fault mode, rather kill VM (Thomas) - Update commit message with tile_invalidated name chagne (Thomas) - Wait VM bookkeep slots with VM resv lock (Thomas) v4: - Move list_del_init(&userptr.repin_link) after error check (Thomas) - Assert not in fault mode (Matthew) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240312183907.933835-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 4 ++-- drivers/gpu/drm/xe/xe_trace.h | 2 +- drivers/gpu/drm/xe/xe_vm.c | 32 ++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_vm_types.h | 7 ++----- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 73c535193a98..241c294270d9 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -69,7 +69,7 @@ static bool access_is_atomic(enum access_type access_type) static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) { return BIT(tile->id) & vma->tile_present && - !(BIT(tile->id) & vma->usm.tile_invalidated); + !(BIT(tile->id) & vma->tile_invalidated); } static bool vma_matches(struct xe_vma *vma, u64 page_addr) @@ -226,7 +226,7 @@ retry_userptr: if (xe_vma_is_userptr(vma)) ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); - vma->usm.tile_invalidated &= ~BIT(tile->id); + vma->tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: drm_exec_fini(&exec); diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 4ddc55527f9a..846f14507d5f 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -468,7 +468,7 @@ DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, TP_ARGS(vma) ); -DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate, +DEFINE_EVENT(xe_vma, xe_vma_invalidate, TP_PROTO(struct xe_vma *vma), TP_ARGS(vma) ); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 643b3701a738..cbb9b8935c90 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -708,6 +708,7 @@ int xe_vm_userptr_pin(struct xe_vm *vm) int err = 0; LIST_HEAD(tmp_evict); + xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); lockdep_assert_held_write(&vm->lock); /* Collect invalidated userptrs */ @@ -724,11 +725,27 @@ int xe_vm_userptr_pin(struct xe_vm *vm) list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, userptr.repin_link) { err = xe_vma_userptr_pin_pages(uvma); - if (err < 0) - return err; + if (err == -EFAULT) { + list_del_init(&uvma->userptr.repin_link); - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list); + /* Wait for pending binds */ + xe_vm_lock(vm, false); + dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + + err = xe_vm_invalidate_vma(&uvma->vma); + xe_vm_unlock(vm); + if (err) + return err; + } else { + if (err < 0) + return err; + + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->vma.combined_links.rebind, + &vm->rebind_list); + } } return 0; @@ -2024,7 +2041,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, return err; } - if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { + if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) { return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, true, first_op, last_op); } else { @@ -3214,9 +3231,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) u8 id; int ret; - xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma))); xe_assert(xe, !xe_vma_is_null(vma)); - trace_xe_vma_usm_invalidate(vma); + trace_xe_vma_invalidate(vma); /* Check that we don't race with page-table updates */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { @@ -3254,7 +3270,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) } } - vma->usm.tile_invalidated = vma->tile_mask; + vma->tile_invalidated = vma->tile_mask; return 0; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 79b5cab57711..ae5fb565f6bf 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -84,11 +84,8 @@ struct xe_vma { struct work_struct destroy_work; }; - /** @usm: unified shared memory state */ - struct { - /** @tile_invalidated: VMA has been invalidated */ - u8 tile_invalidated; - } usm; + /** @tile_invalidated: VMA has been invalidated */ + u8 tile_invalidated; /** @tile_mask: Tile mask of where to create binding for this VMA */ u8 tile_mask; -- cgit v1.2.3 From bb67dd0225a5f580ae66e25e5f18e8b471038fd0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 12 Mar 2024 07:51:57 -0700 Subject: drm/xe: Group live kunit tests As was done for the normal kunit tests, group the live tests into a single module, xe_live_test.ko. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240312145158.2295351-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/Makefile | 3 ++- drivers/gpu/drm/xe/tests/xe_bo_test.c | 5 ----- drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 5 ----- drivers/gpu/drm/xe/tests/xe_live_test_mod.c | 10 ++++++++++ drivers/gpu/drm/xe/tests/xe_migrate_test.c | 5 ----- drivers/gpu/drm/xe/tests/xe_mocs_test.c | 5 ----- 6 files changed, 12 insertions(+), 21 deletions(-) create mode 100644 drivers/gpu/drm/xe/tests/xe_live_test_mod.c diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 9d1d88af8b2f..8cf2367449d8 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # "live" kunit tests -obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_live_test.o +xe_live_test-y = xe_live_test_mod.o \ xe_bo_test.o \ xe_dma_buf_test.o \ xe_migrate_test.o \ diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c index f408f17f2164..a324cde77db8 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.c +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -19,8 +19,3 @@ static struct kunit_suite xe_bo_test_suite = { }; kunit_test_suite(xe_bo_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_bo kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c index 9f5a9cda8c0f..99cdb718b6c6 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -18,8 +18,3 @@ static struct kunit_suite xe_dma_buf_test_suite = { }; kunit_test_suite(xe_dma_buf_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_dma_buf kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c new file mode 100644 index 000000000000..eb1ea99a5a8b --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ +#include + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe live kunit tests"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c index cf0c173b945f..eb0d8963419c 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -18,8 +18,3 @@ static struct kunit_suite xe_migrate_test_suite = { }; kunit_test_suite(xe_migrate_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_migrate kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c index ee40f31e1e12..6315886b659e 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -19,8 +19,3 @@ static struct kunit_suite xe_mocs_test_suite = { }; kunit_test_suite(xe_mocs_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_mocs kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); -- cgit v1.2.3 From e5a95c8c0a5094b0268d8f0cf69b888bc0fd6bbb Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 13 Mar 2024 02:50:52 +0000 Subject: drm/xe/xe_tracer: Align fence output format in ftrace log The fence print in xe_gt_tlb_invalidation_fence and xe_hw_fence is with "%p", change fence print in xe_sched_job to "%p" also. Cc: Lucas De Marchi Signed-off-by: Shuicheng Lin Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240313025052.1410833-1-shuicheng.lin@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 846f14507d5f..2d56cfc09e42 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -258,7 +258,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __field(u32, guc_state) __field(u32, flags) __field(int, error) - __field(u64, fence) + __field(struct dma_fence *, fence) __field(u64, batch_addr) ), @@ -269,11 +269,11 @@ DECLARE_EVENT_CLASS(xe_sched_job, atomic_read(&job->q->guc->state); __entry->flags = job->q->flags; __entry->error = job->fence->error; - __entry->fence = (unsigned long)job->fence; + __entry->fence = job->fence; __entry->batch_addr = (u64)job->batch_addr[0]; ), - TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", __entry->fence, __entry->seqno, __entry->guc_id, __entry->batch_addr, __entry->guc_state, __entry->flags, __entry->error) -- cgit v1.2.3 From 989d07ac6bb7d269e975f85e8f683f496faa0380 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 13 Mar 2024 20:35:44 +0530 Subject: drm/xe: Return if kobj creation is failed Return after warning regarding kobj creation failure. Fixes: 4ae3aeab32d7 ("drm/xe: Add vram frequency sysfs attributes") Cc: Sujaritha Sundaresan Cc: Tejas Upadhyay Cc: Bommu Krishnaiah Reviewed-by: Tejas Upadhyay Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240313150545.2830408-2-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vram_freq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index 079cc283a186..c5f6b5a5d117 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -111,8 +111,10 @@ void xe_vram_freq_sysfs_init(struct xe_tile *tile) return; kobj = kobject_create_and_add("memory", tile->sysfs); - if (!kobj) + if (!kobj) { drm_warn(&xe->drm, "failed to add memory directory, err: %d\n", -ENOMEM); + return; + } err = sysfs_create_group(kobj, &freq_group_attrs); if (err) { -- cgit v1.2.3 From 023f5c8e904504a069c8faece034c5653646e68a Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 13 Mar 2024 20:35:45 +0530 Subject: drm/xe/xe_exec : In xe_exec_ioctl remove deadcode At label err_unlock_list the condition write_label will never be true. Remove the deadcode line for write_label true. Reported by static analyzer. Cc: Matthew Brost Cc: Tejas Upadhyay Cc: Bommu Krishnaiah Reviewed-by: Tejas Upadhyay Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240313150545.2830408-3-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 952496c6260d..acc066e405d0 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -380,10 +380,7 @@ err_put_job: err_exec: drm_exec_fini(exec); err_unlock_list: - if (write_locked) - up_write(&vm->lock); - else - up_read(&vm->lock); + up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; err_syncs: -- cgit v1.2.3 From cf2cbbc76dc4272fa156f4991fab1024b5815451 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Mar 2024 11:44:30 -0700 Subject: drm/xe: Use xe_assert in xe_device_assert_mem_access The implementation of xe_device_assert_mem_access has a non-zero cost. Use xe_assert rather than XE_WARN_ON so it will compile out in non-debug kernel builds (Kconfig CONFIG_DRM_XE_DEBUG=n). Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240313184430.999397-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 919ad88f0495..b0bfe75eb59f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -666,7 +666,7 @@ bool xe_device_mem_access_ongoing(struct xe_device *xe) */ void xe_device_assert_mem_access(struct xe_device *xe) { - XE_WARN_ON(xe_pm_runtime_suspended(xe)); + xe_assert(xe, !xe_pm_runtime_suspended(xe)); } bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) -- cgit v1.2.3 From 58480c1c912ff8146d067301a0d04cca318b4a66 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 13 Mar 2024 10:13:18 -0700 Subject: drm/xe: Skip VMAs pin when requesting signal to the last XE_EXEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing a XE_EXEC with num_batch_buffer == 0 makes signals passed as argument to be signaled when the last real XE_EXEC is completed. But to do that it was first pinning all VMAs in drm_gpuvm_exec_lock(), this patch remove this pinning as it is not required. This change also help Mesa implementing memory over-commiting recovery as it needs to unbind not needed VMAs when the whole VM can't fit in GPU memory but it can only do the unbiding when the last XE_EXEC is completed. So with this change Mesa can get the signal it want without getting out-of-memory errors. Fixes: eb9702ad2986 ("drm/xe: Allow num_batch_buffer / num_binds == 0 in IOCTLs") Cc: Thomas Hellstrom Co-developed-by: Matthew Brost Signed-off-by: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240313171318.121066-1-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index acc066e405d0..7692ebfe7d47 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -235,6 +235,29 @@ retry: goto err_unlock_list; } + if (!args->num_batch_buffer) { + err = xe_vm_lock(vm, true); + if (err) + goto err_unlock_list; + + if (!xe_vm_in_lr_mode(vm)) { + struct dma_fence *fence; + + fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_unlock_list; + } + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_exec_queue_last_fence_set(q, vm, fence); + dma_fence_put(fence); + } + + xe_vm_unlock(vm); + goto err_unlock_list; + } + vm_exec.vm = &vm->gpuvm; vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; if (xe_vm_in_lr_mode(vm)) { @@ -254,24 +277,6 @@ retry: goto err_exec; } - if (!args->num_batch_buffer) { - if (!xe_vm_in_lr_mode(vm)) { - struct dma_fence *fence; - - fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto err_exec; - } - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); - xe_exec_queue_last_fence_set(q, vm, fence); - dma_fence_put(fence); - } - - goto err_exec; - } - if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { err = -EWOULDBLOCK; /* Aliased to -EAGAIN */ skip_retry = true; -- cgit v1.2.3 From 002d8f0b4f76aabbf8e00c538a124b91625d7260 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 11 Mar 2024 16:11:59 +0100 Subject: drm/xe: Remove unused xe_bo->props struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Property struct is not being used so remove it and related dead code. Fixes: ddfa2d6a846a ("drm/xe/uapi: Kill VM_MADVISE IOCTL") Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: intel-xe@lists.freedesktop.org Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240311151159.10036-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 59 ++++++---------------------------------- drivers/gpu/drm/xe/xe_bo_types.h | 19 ------------- 2 files changed, 9 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index b89ac6db68a1..d7e0deb1b869 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -144,9 +144,6 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, .mem_type = XE_PL_TT, }; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = XE_PL_TT; } } @@ -181,25 +178,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } places[*c] = place; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = mem_type; } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo->props.preferred_gt == XE_GT1) { - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - } else { - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - } + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, @@ -223,17 +210,8 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, { u32 c = 0; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; - - /* The order of placements should indicate preferred location */ - - if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) { - try_add_system(xe, bo, bo_flags, &c); - try_add_vram(xe, bo, bo_flags, &c); - } else { - try_add_vram(xe, bo, bo_flags, &c); - try_add_system(xe, bo, bo_flags, &c); - } + try_add_vram(xe, bo, bo_flags, &c); + try_add_system(xe, bo, bo_flags, &c); try_add_stolen(xe, bo, bo_flags, &c); if (!c) @@ -1118,13 +1096,6 @@ static void xe_gem_object_close(struct drm_gem_object *obj, } } -static bool should_migrate_to_system(struct xe_bo *bo) -{ - struct xe_device *xe = xe_bo_device(bo); - - return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; -} - static vm_fault_t xe_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; @@ -1133,7 +1104,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK; vm_fault_t ret; - int idx, r = 0; + int idx; if (needs_rpm) xe_device_mem_access_get(xe); @@ -1145,17 +1116,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) if (drm_dev_enter(ddev, &idx)) { trace_xe_bo_cpu_fault(bo); - if (should_migrate_to_system(bo)) { - r = xe_bo_migrate(bo, XE_PL_TT); - if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) - ret = VM_FAULT_NOPAGE; - else if (r) - ret = VM_FAULT_SIGBUS; - } - if (!ret) - ret = ttm_bo_vm_fault_reserved(vmf, - vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); @@ -1283,9 +1245,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; - bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; - bo->props.preferred_gt = XE_BO_PROPS_INVALID; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; bo->ttm.priority = XE_BO_PRIORITY_NORMAL; INIT_LIST_HEAD(&bo->pinned_link); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 14ef13b7b421..86422e113d39 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -56,25 +56,6 @@ struct xe_bo { */ struct list_head client_link; #endif - /** @props: BO user controlled properties */ - struct { - /** @preferred_mem: preferred memory class for this BO */ - s16 preferred_mem_class; - /** @prefered_gt: preferred GT for this BO */ - s16 preferred_gt; - /** @preferred_mem_type: preferred memory type */ - s32 preferred_mem_type; - /** - * @cpu_atomic: the CPU expects to do atomics operations to - * this BO - */ - bool cpu_atomic; - /** - * @device_atomic: the device expects to do atomics operations - * to this BO - */ - bool device_atomic; - } props; /** @freed: List node for delayed put. */ struct llist_node freed; /** @created: Whether the bo has passed initial creation */ -- cgit v1.2.3 From 2c5b70f74d61438a071a19370e63c234d2bd8938 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 14 Mar 2024 12:15:55 +0000 Subject: drm/xe/guc_submit: use jiffies for job timeout drm_sched_init() expects jiffies for the timeout, but here we are passing the timeout in ms. Convert to jiffies instead. Fixes: eef55700f302 ("drm/xe: Add sysfs for default engine scheduler properties") Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240314121554.223229-2-matthew.auld@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 19efdb2f881f..82c955a2a15c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1226,7 +1226,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) init_waitqueue_head(&ge->suspend_wait); timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : - q->sched_props.job_timeout_ms; + msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, get_submit_wq(guc), q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, -- cgit v1.2.3 From cc244ce531d4ef013d0d87e11141bb94d4235828 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 4 Mar 2024 06:56:34 -0800 Subject: drm/xe/gsc: Handle GSCCS ER interrupt Starting on Xe2, the GSCCS engine reset is a 2-step process. When the driver or the GuC hits the GDRST register, the CS is immediately reset and a success is reported, but the GSC shim continues its reset in the background. While the shim reset is ongoing, the CS is able to accept new context submission, but any commands that require the shim will be stalled until the reset is completed. This means that we can keep submitting to the GSCCS as long as we make sure that the preemption timeout is big enough to cover any delay introduced by the reset; since the GSC preempt timeout is not tunable at runtime, we only need to check that the value set in kconfig is big enough (and increase it if it isn't). When the shim reset completes, a specific CS interrupt is triggered, in response to which we need to check the GSCI_TIMER_STATUS register to see if the reset was successful or not. Note that the GSCI_TIMER_STATUS register is not power save/restored, so it gets reset on MC6 entry. However, a reset failure stops MC6, so in that scenario we're always guaranteed to find the correct value. Since we can't check the register within interrupt context, the existing GSC worker has been updated to handle it. The expected action to take on ER failure is to trigger a driver FLR, but we still don't support that, so for now we just print an error. A comment has been added to the code to keep track of the FLR requirement. v2: Add a check for the initial timeout value (Alan) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20240304145634.820684-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/regs/xe_gsc_regs.h | 7 ++++ drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_gsc.c | 63 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc.h | 2 ++ drivers/gpu/drm/xe/xe_gsc_types.h | 1 + drivers/gpu/drm/xe/xe_hw_engine.c | 35 ++++++++++++++++++- drivers/gpu/drm/xe/xe_irq.c | 2 +- 7 files changed, 109 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 9886ec9cb08e..e2a925be137c 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -38,4 +38,11 @@ #define HECI_H_GS1(base) XE_REG((base) + 0xc4c) #define HECI_H_GS1_ER_PREP REG_BIT(0) +#define GSCI_TIMER_STATUS XE_REG(0x11ca28) +#define GSCI_TIMER_STATUS_VALUE REG_GENMASK(1, 0) +#define GSCI_TIMER_STATUS_RESET_IN_PROGRESS 0 +#define GSCI_TIMER_STATUS_TIMER_EXPIRED 1 +#define GSCI_TIMER_STATUS_RESET_COMPLETE 2 +#define GSCI_TIMER_STATUS_OUT_OF_RESET 3 + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 15ac2d284d48..abb6e86fe367 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -486,6 +486,7 @@ #define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) +#define GSC_ER_COMPLETE REG_BIT(5) #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) #define GT_RENDER_USER_INTERRUPT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index d9aa815a5bc2..09f6e7899921 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -27,6 +27,7 @@ #include "xe_wa.h" #include "instructions/xe_gsc_commands.h" #include "regs/xe_gsc_regs.h" +#include "regs/xe_gt_regs.h" static struct xe_gt * gsc_to_gt(struct xe_gsc *gsc) @@ -273,6 +274,44 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) return 0; } +static int gsc_er_complete(struct xe_gt *gt) +{ + u32 er_status; + + if (!gsc_fw_is_loaded(gt)) + return 0; + + /* + * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the + * driver or the GuC hit the GDRST register, the CS is immediately reset + * and a success is reported, but the GSC shim keeps resetting in the + * background. While the shim reset is ongoing, the CS is able to accept + * new context submission, but any commands that require the shim will + * be stalled until the reset is completed. This means that we can keep + * submitting to the GSCCS as long as we make sure that the preemption + * timeout is big enough to cover any delay introduced by the reset. + * When the shim reset completes, a specific CS interrupt is triggered, + * in response to which we need to check the GSCI_TIMER_STATUS register + * to see if the reset was successful or not. + * Note that the GSCI_TIMER_STATUS register is not power save/restored, + * so it gets reset on MC6 entry. However, a reset failure stops MC6, + * so in that scenario we're always guaranteed to find the correct + * value. + */ + er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; + + if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { + /* + * XXX: we should trigger an FLR here, but we don't have support + * for that yet. + */ + xe_gt_err(gt, "GSC ER timed out!\n"); + return -EIO; + } + + return 0; +} + static void gsc_work(struct work_struct *work) { struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); @@ -289,6 +328,12 @@ static void gsc_work(struct work_struct *work) xe_pm_runtime_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (actions & GSC_ACTION_ER_COMPLETE) { + ret = gsc_er_complete(gt); + if (ret) + goto out; + } + if (actions & GSC_ACTION_FW_LOAD) { ret = gsc_upload_and_init(gsc); if (ret && ret != -EEXIST) @@ -300,10 +345,28 @@ static void gsc_work(struct work_struct *work) if (actions & GSC_ACTION_SW_PROXY) xe_gsc_proxy_request_handler(gsc); +out: xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); xe_pm_runtime_put(xe); } +void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) +{ + struct xe_gt *gt = hwe->gt; + struct xe_gsc *gsc = >->uc.gsc; + + if (unlikely(!intr_vec)) + return; + + if (intr_vec & GSC_ER_COMPLETE) { + spin_lock(&gsc->lock); + gsc->work_actions |= GSC_ACTION_ER_COMPLETE; + spin_unlock(&gsc->lock); + + queue_work(gsc->wq, &gsc->work); + } +} + int xe_gsc_init(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index c6fb32e3fd79..dd16e9b8b894 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -9,12 +9,14 @@ #include "xe_gsc_types.h" struct xe_gt; +struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_remove(struct xe_gsc *gsc); +void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 138d8cc0f19c..5926de20214c 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -47,6 +47,7 @@ struct xe_gsc { u32 work_actions; #define GSC_ACTION_FW_LOAD BIT(0) #define GSC_ACTION_SW_PROXY BIT(1) +#define GSC_ACTION_ER_COMPLETE BIT(2) /** @proxy: sub-structure containing the SW proxy-related variables */ struct { diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b5e83ea172f3..2c5615130a38 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -14,8 +14,10 @@ #include "xe_device.h" #include "xe_execlist.h" #include "xe_force_wake.h" +#include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_printk.h" #include "xe_gt_topology.h" #include "xe_hw_fence.h" #include "xe_irq.h" @@ -463,6 +465,32 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; + + /* + * The GSC engine can accept submissions while the GSC shim is + * being reset, during which time the submission is stalled. In + * the worst case, the shim reset can take up to the maximum GSC + * command execution time (250ms), so the request start can be + * delayed by that much; the request itself can take that long + * without being preemptible, which means worst case it can + * theoretically take up to 500ms for a preemption to go through + * on the GSC engine. Adding to that an extra 100ms as a safety + * margin, we get a minimum recommended timeout of 600ms. + * The preempt_timeout value can't be tuned for OTHER_CLASS + * because the class is reserved for kernel usage, so we just + * need to make sure that the starting value is above that + * threshold; since our default value (640ms) is greater than + * 600ms, the only way we can go below is via a kconfig setting. + * If that happens, log it in dmesg and update the value. + */ + if (hwe->class == XE_ENGINE_CLASS_OTHER) { + const u32 min_preempt_timeout = 600 * 1000; + if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) { + hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout; + xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n"); + } + } + /* Record default props */ hwe->eclass->defaults = hwe->eclass->sched_props; } @@ -509,8 +537,13 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, } } - if (xe_device_uc_enabled(xe)) + if (xe_device_uc_enabled(xe)) { + /* GSCCS has a special interrupt for reset */ + if (hwe->class == XE_ENGINE_CLASS_OTHER) + hwe->irq_handler = xe_gsc_hwe_irq_handler; + xe_hw_engine_enable_ring(hwe); + } /* We reserve the highest BCS instance for USM */ if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 2f5d179e0d00..2275ca35a3c7 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -187,7 +187,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) * GSCCS interrupts, but it has its own mask register. */ if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { - gsc_mask = irqs; + gsc_mask = irqs | GSC_ER_COMPLETE; heci_mask = GSC_IRQ_INTF(1); } else if (HAS_HECI_GSCFI(xe)) { gsc_mask = GSC_IRQ_INTF(1); -- cgit v1.2.3 From 00e9062e8d2037c3ac5900eab7a504aa17cab406 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Mar 2024 12:58:26 -0700 Subject: drm/xe/mocs: Determine MCR separately for primary/media GT in kunit test Although MOCS registers became multicast in graphics version 12.50 on the primary GT, this transition did not happen until version 20 on the media GT. Considering each GT independently is mostly important for MTL/ARL where the Xe_LPM+ IP has non-MCR MOCS registers, even though Xe_LPG IP has MCR registers. Bspec: 67789, 71186 Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240314195825.3226856-3-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/tests/xe_mocs.c | 4 ++-- drivers/gpu/drm/xe/xe_mocs.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 25dd93ff1606..7fd99ba96a12 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -51,7 +51,7 @@ static void read_l3cc_table(struct xe_gt *gt, for (i = 0; i < info->n_entries; i++) { if (!(i & 1)) { - if (GRAPHICS_VERx100(xe) >= 1250) + if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1)); else reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1)); @@ -91,7 +91,7 @@ static void read_mocs_table(struct xe_gt *gt, KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); for (i = 0; i < info->n_entries; i++) { - if (GRAPHICS_VERx100(xe) >= 1250) + if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index f56f630fa29d..35ccd4ac00d2 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -466,6 +466,16 @@ static u32 get_entry_control(const struct xe_mocs_info *info, return info->table[info->unused_entries_index].control_value; } +static bool regs_are_mcr(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe_gt_is_media_type(gt)) + return MEDIA_VER(xe) >= 20; + else + return GRAPHICS_VERx100(xe) >= 1250; +} + static void __init_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { @@ -485,7 +495,7 @@ static void __init_mocs_table(struct xe_gt *gt, mocs_dbg(>_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, XELP_GLOBAL_MOCS(i).addr, mocs); - if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250) + if (regs_are_mcr(gt)) xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs); else xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs); @@ -525,7 +535,7 @@ static void init_l3cc_table(struct xe_gt *gt, mocs_dbg(>_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, l3cc); - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + if (regs_are_mcr(gt)) xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc); else xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc); -- cgit v1.2.3 From 431547134ef7b9cff90d5a7ab95d145136f2bec7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Mar 2024 12:58:27 -0700 Subject: drm/xe/mocs: Clarify which GT is being operated on Switch the MOCS-related debug messages to use a GT-specific logging function and add ID/type output to the beginning of the MOCS kunit test to assist with debug when problems arise. Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240314195825.3226856-4-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/tests/xe_mocs.c | 10 +++++----- drivers/gpu/drm/xe/xe_mocs.c | 20 +++++++++----------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 7fd99ba96a12..1b8617075b37 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -29,6 +29,8 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) flags = get_mocs_settings(gt_to_xe(gt), &arg->table); + kunit_info(test, "gt %d", gt->info.id); + kunit_info(test, "gt type %d", gt->info.type); kunit_info(test, "table size %d", arg->table.size); kunit_info(test, "table uc_index %d", arg->table.uc_index); kunit_info(test, "table n_entries %d", arg->table.n_entries); @@ -40,7 +42,6 @@ static void read_l3cc_table(struct xe_gt *gt, const struct xe_mocs_info *info) { struct kunit *test = xe_cur_kunit(); - struct xe_device *xe = gt_to_xe(gt); u32 l3cc, l3cc_expected; unsigned int i; u32 reg_val; @@ -56,7 +57,7 @@ static void read_l3cc_table(struct xe_gt *gt, else reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1)); - mocs_dbg(&xe->drm, "reg_val=0x%x\n", reg_val); + mocs_dbg(gt, "reg_val=0x%x\n", reg_val); } else { /* Just re-use value read on previous iteration */ reg_val >>= 16; @@ -65,7 +66,7 @@ static void read_l3cc_table(struct xe_gt *gt, l3cc_expected = get_entry_l3cc(info, i); l3cc = reg_val & 0xffff; - mocs_dbg(&xe->drm, "[%u] expected=0x%x actual=0x%x\n", + mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n", i, l3cc_expected, l3cc); KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc, @@ -78,7 +79,6 @@ static void read_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { struct kunit *test = xe_cur_kunit(); - struct xe_device *xe = gt_to_xe(gt); u32 mocs, mocs_expected; unsigned int i; u32 reg_val; @@ -99,7 +99,7 @@ static void read_mocs_table(struct xe_gt *gt, mocs_expected = get_entry_control(info, i); mocs = reg_val; - mocs_dbg(&xe->drm, "[%u] expected=0x%x actual=0x%x\n", + mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n", i, mocs_expected, mocs); KUNIT_EXPECT_EQ_MSG(test, mocs_expected, mocs, diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 35ccd4ac00d2..bff659d20062 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -17,10 +17,10 @@ #include "xe_step_types.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define mocs_dbg drm_dbg +#define mocs_dbg xe_gt_dbg #else __printf(2, 3) -static inline void mocs_dbg(const struct drm_device *dev, +static inline void mocs_dbg(const struct xe_gt *gt, const char *format, ...) { /* noop */ } #endif @@ -479,20 +479,18 @@ static bool regs_are_mcr(struct xe_gt *gt) static void __init_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { - struct xe_device *xe = gt_to_xe(gt); - unsigned int i; u32 mocs; - drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, - "Unused entries index should have been defined\n"); + xe_gt_WARN_ONCE(gt, !info->unused_entries_index, + "Unused entries index should have been defined\n"); - mocs_dbg(>_to_xe(gt)->drm, "mocs entries: %d\n", info->n_entries); + mocs_dbg(gt, "mocs entries: %d\n", info->n_entries); for (i = 0; i < info->n_entries; i++) { mocs = get_entry_control(info, i); - mocs_dbg(>_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, + mocs_dbg(gt, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, XELP_GLOBAL_MOCS(i).addr, mocs); if (regs_are_mcr(gt)) @@ -526,13 +524,13 @@ static void init_l3cc_table(struct xe_gt *gt, unsigned int i; u32 l3cc; - mocs_dbg(>_to_xe(gt)->drm, "l3cc entries: %d\n", info->n_entries); + mocs_dbg(gt, "l3cc entries: %d\n", info->n_entries); for (i = 0; i < (info->n_entries + 1) / 2; i++) { l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), get_entry_l3cc(info, 2 * i + 1)); - mocs_dbg(>_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, + mocs_dbg(gt, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, l3cc); if (regs_are_mcr(gt)) @@ -568,7 +566,7 @@ void xe_mocs_init(struct xe_gt *gt) * performed by the GuC. */ flags = get_mocs_settings(gt_to_xe(gt), &table); - mocs_dbg(>_to_xe(gt)->drm, "flag:0x%x\n", flags); + mocs_dbg(gt, "flag:0x%x\n", flags); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt, &table); -- cgit v1.2.3 From bde5d76785bcf62afcfd873504599539d4e6c014 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Thu, 14 Mar 2024 14:07:35 -0700 Subject: drm/xe: Add helper macro to loop each DSS Add helper macro to loop each DSS. This is a precursor patch to allow for easier iteration through MCR registers and other per-DSS uses. Signed-off-by: Zhanjun Dong Reviewed-by: Michal Wajdeczko Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240314210735.258553-2-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_gt_mcr.c | 34 ++++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_gt_mcr.h | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_gt_topology.c | 3 --- drivers/gpu/drm/xe/xe_gt_topology.h | 11 +++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 6 ++++-- 5 files changed, 57 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index a7ab9ba645f9..866bbd26ba3f 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -6,6 +6,7 @@ #include "xe_gt_mcr.h" #include "regs/xe_gt_regs.h" +#include "xe_assert.h" #include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" @@ -294,14 +295,35 @@ static void init_steering_mslice(struct xe_gt *gt) gt->steering[LNCF].instance_target = 0; /* unused */ } -static void init_steering_dss(struct xe_gt *gt) +static unsigned int dss_per_group(struct xe_gt *gt) +{ + return gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; +} + +/** + * xe_gt_mcr_get_dss_steering - Get the group/instance steering for a DSS + * @gt: GT structure + * @dss: DSS ID to obtain steering for + * @group: pointer to storage for steering group ID + * @instance: pointer to storage for steering instance ID + */ +void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { - unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), - xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)); - unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; + int dss_per_grp = dss_per_group(gt); + + xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); + + *group = dss / dss_per_grp; + *instance = dss % dss_per_grp; +} - gt->steering[DSS].group_target = dss / dss_per_grp; - gt->steering[DSS].instance_target = dss % dss_per_grp; +static void init_steering_dss(struct xe_gt *gt) +{ + xe_gt_mcr_get_dss_steering(gt, + min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), + xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)), + >->steering[DSS].group_target, + >->steering[DSS].instance_target); } static void init_steering_oaddrm(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index 27ca1bc880a0..a7f4ab1aa584 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -7,6 +7,7 @@ #define _XE_GT_MCR_H_ #include "regs/xe_reg_defs.h" +#include "xe_gt_topology.h" struct drm_printer; struct xe_gt; @@ -25,5 +26,18 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, u32 value); void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); + +/* + * Loop over each DSS and determine the group and instance IDs that + * should be used to steer MCR accesses toward this DSS. + * @dss: DSS ID to obtain steering for + * @gt: GT structure + * @group: steering group ID, data type: u16 + * @instance: steering instance ID, data type: u16 + */ +#define for_each_dss_steering(dss, gt, group, instance) \ + for_each_dss((dss), (gt)) \ + for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true)) #endif /* _XE_GT_MCR_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 5dc62fe1be49..f5773a14f3c8 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -11,9 +11,6 @@ #include "xe_gt.h" #include "xe_mmio.h" -#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) -#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) - static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) { diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index d1b54fb52ea6..b3e357777a6e 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -8,6 +8,17 @@ #include "xe_gt_types.h" +/* + * Loop over each DSS with the bit is 1 in geometry or compute mask + * @dss: iterated DSS bit from the DSS mask + * @gt: GT structure + */ +#define for_each_dss(dss, gt) \ + for_each_or_bit((dss), \ + (gt)->fuse_topo.g_dss_mask, \ + (gt)->fuse_topo.c_dss_mask, \ + XE_MAX_DSS_FUSE_BITS) + struct drm_printer; void xe_gt_topology_init(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 70c615dd1498..f6da2ad9719f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -25,10 +25,12 @@ enum xe_gt_type { }; #define XE_MAX_DSS_FUSE_REGS 3 +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) #define XE_MAX_EU_FUSE_REGS 1 +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) -typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; -typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)]; +typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)]; +typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)]; struct xe_mmio_range { u32 start; -- cgit v1.2.3 From 0322fa12f4513eca159f6f605282ccf5f4f2bbc3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 Mar 2024 18:31:25 +0100 Subject: drm/xe: Assert size of the struct xe_reg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to keep the struct xe_reg as small as possible. Make sure we don't accidentally change its size. Reviewed-by: Matt Roper Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240314173130.1177-2-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index c50e7650c09a..114b07db962f 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -6,6 +6,8 @@ #ifndef _XE_REG_DEFS_H_ #define _XE_REG_DEFS_H_ +#include + #include "compat-i915-headers/i915_reg_defs.h" /** @@ -44,6 +46,7 @@ struct xe_reg { u32 raw; }; }; +static_assert(sizeof(struct xe_reg) == sizeof(u32)); /** * struct xe_reg_mcr - MCR register definition -- cgit v1.2.3 From 5a9481a86b7448d600829e6f14205a241a23a03c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 Mar 2024 18:31:26 +0100 Subject: drm/xe: Define XE_REG_OPTION_VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will tag registers that SR-IOV Virtual Functions can access. This will help us catch any invalid usage and/or provide custom replacement if available. Reviewed-by: Matt Roper Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240314173130.1177-3-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 114b07db962f..c89ef2b79a3f 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -37,6 +37,10 @@ struct xe_reg { * value can inspect it. */ u32 mcr:1; + /** + * @vf: register is accessible from the Virtual Function. + */ + u32 vf:1; /** * @ext: access MMIO extension space for current register. */ @@ -78,6 +82,13 @@ struct xe_reg_mcr { */ #define XE_REG_OPTION_MASKED .masked = 1 +/** + * XE_REG_OPTION_VF - Register is "VF" accessible. + * + * To be used with XE_REG() and XE_REG_INITIALIZER(). + */ +#define XE_REG_OPTION_VF .vf = 1 + /** * XE_REG_INITIALIZER - Initializer for xe_reg_t. * @r_: Register offset -- cgit v1.2.3 From cfe7267ef14bb2b658c135af504b0aa46ff8e9ab Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 Mar 2024 18:31:27 +0100 Subject: drm/xe: Mark VF accessible GuC registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only selected registers are available for Virtual Functions. Reviewed-by: Matt Roper Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240314173130.1177-4-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 4e7f809d2b00..11682e675e0f 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -100,14 +100,14 @@ #define GT_PM_CONFIG XE_REG(0x13816c) #define GT_DOORBELL_ENABLE REG_BIT(0) -#define GUC_HOST_INTERRUPT XE_REG(0x1901f0) +#define GUC_HOST_INTERRUPT XE_REG(0x1901f0, XE_REG_OPTION_VF) -#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4) +#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4, XE_REG_OPTION_VF) #define VF_SW_FLAG_COUNT 4 -#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304) +#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304, XE_REG_OPTION_VF) -#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4) +#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4, XE_REG_OPTION_VF) #define MED_VF_SW_FLAG_COUNT 4 #define GUC_TLB_INV_CR XE_REG(0xcee8) -- cgit v1.2.3 From 50707fdb6e366e156d6ea05afaf46223f9e00c3d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 Mar 2024 18:31:28 +0100 Subject: drm/xe: Mark VF accessible global registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only selected registers are available for Virtual Functions. Reviewed-by: Matt Roper Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240314173130.1177-5-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/regs/xe_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 2c214bb9b671..722fb6dbb72e 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -57,7 +57,7 @@ #define DG1_MSTR_IRQ REG_BIT(31) #define DG1_MSTR_TILE(t) REG_BIT(t) -#define GFX_MSTR_IRQ XE_REG(0x190010) +#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF) #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) #define DISPLAY_IRQ REG_BIT(16) -- cgit v1.2.3 From 42b266be3228f03e402bc663aa8dee8fdca1d48b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 Mar 2024 18:31:29 +0100 Subject: drm/xe: Mark VF accessible interrupt registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Interrupt registers 1900xx are VF accessible but only until version 12.50 as on newer platforms VFs are using memory-based interrupts. To avoid complexity, we mark those registers with XE_REG_OPTION_VF unconditionally, as IRQ handling on newer VFs is different anyway. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240314173130.1177-6-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 40 +++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index abb6e86fe367..95969935f58b 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -439,7 +439,13 @@ #define GT_PERF_STATUS XE_REG(0x1381b4) #define VOLTAGE_MASK REG_GENMASK(10, 0) -#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4)) +/* + * Note: Interrupt registers 1900xx are VF accessible only until version 12.50. + * On newer platforms, VFs are using memory-based interrupts instead. + * However, for simplicity we keep this XE_REG_OPTION_VF tag intact. + */ + +#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF) #define INTR_GSC REG_BIT(31) #define INTR_GUC REG_BIT(25) #define INTR_MGUC REG_BIT(24) @@ -450,16 +456,16 @@ #define INTR_VECS(x) REG_BIT(31 - (x)) #define INTR_VCS(x) REG_BIT(x) -#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) -#define VCS_VECS_INTR_ENABLE XE_REG(0x190034) -#define GUC_SG_INTR_ENABLE XE_REG(0x190038) +#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF) +#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF) +#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF) #define ENGINE1_MASK REG_GENMASK(31, 16) #define ENGINE0_MASK REG_GENMASK(15, 0) -#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c) -#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044) -#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048) +#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF) +#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF) +#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF) -#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4)) +#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF) #define INTR_DATA_VALID REG_BIT(31) #define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) #define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) @@ -468,16 +474,16 @@ #define OTHER_GSC_HECI2_INSTANCE 3 #define OTHER_GSC_INSTANCE 6 -#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4)) -#define RCS0_RSVD_INTR_MASK XE_REG(0x190090) -#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0) -#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8) -#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac) -#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0) +#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF) +#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF) +#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF) +#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF) +#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF) +#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF) #define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) -#define GUC_SG_INTR_MASK XE_REG(0x1900e8) -#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec) -#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4) +#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) +#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) +#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF) #define CCS0_CCS1_INTR_MASK XE_REG(0x190100) #define CCS2_CCS3_INTR_MASK XE_REG(0x190104) #define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) -- cgit v1.2.3 From 54c659660d637d38ec793b31c2718d905d6e86b9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 Mar 2024 18:31:30 +0100 Subject: drm/xe: Make xe_mmio_read|write() functions non-inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shortly we will updating xe_mmio_read|write() functions with SR-IOV specific features making those functions less suitable for inline. Convert now those functions into regular ones, lowering driver footprint, according to scripts/bloat-o-meter, by 6% add/remove: 18/18 grow/shrink: 31/603 up/down: 2719/-79663 (-76944) Function old new delta Total: Before=1276633, After=1199689, chg -6.03% add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0) Data old new delta Total: Before=48990, After=48990, chg +0.00% add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0) RO Data old new delta Total: Before=115680, After=115680, chg +0.00% Reviewed-by: Matt Roper Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240314173130.1177-7-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_mmio.c | 72 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_mmio.h | 81 ++++---------------------------------------- 2 files changed, 79 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7ba2477452d7..1de9de4f94b6 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -420,6 +420,78 @@ int xe_mmio_root_tile_init(struct xe_device *xe) return 0; } +u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) +{ + u32 old, reg_val; + + old = xe_mmio_read32(gt, reg); + reg_val = (old & ~clr) | set; + xe_mmio_write32(gt, reg, reg_val); + + return old; +} + +int xe_mmio_write32_and_verify(struct xe_gt *gt, + struct xe_reg reg, u32 val, u32 mask, u32 eval) +{ + u32 reg_val; + + xe_mmio_write32(gt, reg, val); + reg_val = xe_mmio_read32(gt, reg); + + return (reg_val & mask) != eval ? -EINVAL : 0; +} + +bool xe_mmio_in_range(const struct xe_gt *gt, + const struct xe_mmio_range *range, + struct xe_reg reg) +{ + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return range && reg.addr >= range->start && reg.addr <= range->end; +} + /** * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads * @gt: MMIO target GT diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 98de5c13c89b..67ead99f321b 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -24,80 +24,13 @@ int xe_mmio_init(struct xe_device *xe); int xe_mmio_root_tile_init(struct xe_device *xe); void xe_mmio_probe_tiles(struct xe_device *xe); -static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline void xe_mmio_write32(struct xe_gt *gt, - struct xe_reg reg, u32 val) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, - u32 set) -{ - u32 old, reg_val; - - old = xe_mmio_read32(gt, reg); - reg_val = (old & ~clr) | set; - xe_mmio_write32(gt, reg, reg_val); - - return old; -} - -static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, - struct xe_reg reg, u32 val, - u32 mask, u32 eval) -{ - u32 reg_val; - - xe_mmio_write32(gt, reg, val); - reg_val = xe_mmio_read32(gt, reg); - - return (reg_val & mask) != eval ? -EINVAL : 0; -} - -static inline bool xe_mmio_in_range(const struct xe_gt *gt, - const struct xe_mmio_range *range, - struct xe_reg reg) -{ - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return range && reg.addr >= range->start && reg.addr <= range->end; -} +u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg); +u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg); +void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); +u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg); +u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set); +int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval); +bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg); int xe_mmio_probe_vram(struct xe_device *xe); u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); -- cgit v1.2.3 From 6583b0839ad5a1d7ee69f9c5749acdbde7be9b80 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 13 Mar 2024 11:41:30 +0100 Subject: drm/xe: Allow VRAM BO allocations aligned to 64K MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While today we are getting VRAM allocations aligned to 64K as the XE_VRAM_FLAGS_NEED64K flag could be set, we shouldn't only rely on that flag and we should also allow caller to specify required 64K alignment explicitly. Define new XE_BO_NEEDS_64K flag for that. Cc: Matt Roper Reviewed-by: Rodrigo Vivi Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240313104132.1045-2-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_bo.c | 3 ++- drivers/gpu/drm/xe/xe_bo.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d7e0deb1b869..8383c56c9960 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1217,7 +1217,8 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) && !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && - xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { + ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) || + (flags & XE_BO_NEEDS_64K))) { aligned_size = ALIGN(size, SZ_64K); if (type != ttm_bo_type_device) size = ALIGN(size, SZ_64K); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index c59ad15961ce..87ae8ee7c316 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -45,6 +45,7 @@ #define XE_BO_PAGETABLE BIT(12) #define XE_BO_NEEDS_CPU_ACCESS BIT(13) #define XE_BO_NEEDS_UC BIT(14) +#define XE_BO_NEEDS_64K BIT(15) /* this one is trigger internally only */ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) -- cgit v1.2.3 From 28b2a21892be14bde532da20b0449bc5dac7311b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 13 Mar 2024 11:41:31 +0100 Subject: drm/xe/pf: Request 64K aligned allocations for LMTT PD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LMTT Page Directory, as well as the directory entries, must be aligned on a 64KB boundary in VRAM. Use explicit alignment flag to match hardware requirement. Bspec: 52404, 67468 Cc: Michał Winiarski Reviewed-by: Rodrigo Vivi Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240313104132.1045-3-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_lmtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 0d7c5514e092..dfb0e1887615 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -71,7 +71,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level lmtt->ops->lmtt_pte_num(level)), ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_NEEDS_64K | XE_BO_CREATE_PINNED_BIT); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; -- cgit v1.2.3 From df26ac946416a3fc43347f143c71b2f4c0b7c63e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 13 Mar 2024 11:41:32 +0100 Subject: drm/xe/pf: Always select Multi-Level LMTT for platforms 12.60+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Multi-Level LMTT variant is not specific only to the PVC. Change logic to select it for all new platforms beyond 12.60. Bspec: 52404, 67468 Cc: Rodrigo Vivi Cc: Michał Winiarski Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240313104132.1045-4-michal.wajdeczko@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_lmtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index dfb0e1887615..7f504392a8bf 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -35,7 +35,7 @@ static bool xe_has_multi_level_lmtt(struct xe_device *xe) { - return xe->info.platform == XE_PVC; + return GRAPHICS_VERx100(xe) >= 1260; } static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt) -- cgit v1.2.3 From 10ed10c27967cacf90d3daa6f66ddddaa35bb796 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 09:34:32 +0000 Subject: drm/xe/client: remove bogus rcu list usage We use plain spinlock to protect readers and writers, so there is no actual RCU here. Rather use the more appropriate non-rcu list based API. Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240318093431.21075-3-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_drm_client.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6040e4d22b28..142a1905b624 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -78,7 +78,7 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, spin_lock(&client->bos_lock); bo->client = xe_drm_client_get(client); - list_add_tail_rcu(&bo->client_link, &client->bos_list); + list_add_tail(&bo->client_link, &client->bos_list); spin_unlock(&client->bos_lock); } @@ -96,7 +96,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) struct xe_drm_client *client = bo->client; spin_lock(&client->bos_lock); - list_del_rcu(&bo->client_link); + list_del(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -154,7 +154,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) /* Internal objects. */ spin_lock(&client->bos_lock); - list_for_each_entry_rcu(bo, &client->bos_list, client_link) { + list_for_each_entry(bo, &client->bos_list, client_link) { if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount)) continue; bo_meminfo(bo, stats); -- cgit v1.2.3 From ab0c5aba32fc6dd17ea699d23ad871223d523a17 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 09:34:33 +0000 Subject: drm/xe/client: drop bogus bo NULL check If we fished it out the list then it can't be null; the list entry is embedded in the bo. Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240318093431.21075-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_drm_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 142a1905b624..da154b059583 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -155,7 +155,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) /* Internal objects. */ spin_lock(&client->bos_lock); list_for_each_entry(bo, &client->bos_list, client_link) { - if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount)) + if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; bo_meminfo(bo, stats); xe_bo_put(bo); -- cgit v1.2.3 From 1d3c830574dcb2d6b37c82d70cd3027102c0c603 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 10:36:17 +0000 Subject: drm/xe/vm: fix xe_assert() The region can be used an index into the region_to_mem_type, so we should be asserting that it is less than the ARRAY_SIZE here. Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240318103616.26240-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index cbb9b8935c90..51d62323d9ee 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2033,7 +2033,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); int err; - xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type)); if (!xe_vma_has_no_bo(vma)) { err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); -- cgit v1.2.3 From 4f4fcafde343a54465f85a2909fc684918507a4b Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 18 Mar 2024 17:43:41 +0100 Subject: drm/xe: Fix potential integer overflow in page size calculation Explicitly cast tbo->page_alignment to u64 before bit-shifting to prevent overflow when assigning to min_page_size. Cc: Matthew Auld Cc: Matthew Brost Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240318164342.3094-1-nirmoy.das@intel.com --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 115ec745e502..0678faf83212 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -91,7 +91,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, min_page_size = mgr->default_page_size; if (tbo->page_alignment) - min_page_size = tbo->page_alignment << PAGE_SHIFT; + min_page_size = (u64)tbo->page_alignment << PAGE_SHIFT; if (WARN_ON(min_page_size < mm->chunk_size)) { err = -EINVAL; -- cgit v1.2.3 From fe87b7dfcb204a161d1e38b0e787b2f5ab520f32 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 18:05:33 +0000 Subject: drm/xe/queue: fix engine_class bounds check The engine_class is the index into the user_to_xe_engine_class, therefore it needs to be less than. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240318180532.57522-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 6a83bc57826a..2016c1af9633 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -440,7 +440,7 @@ find_hw_engine(struct xe_device *xe, { u32 idx; - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; if (eci.gt_id >= xe->info.gt_count) -- cgit v1.2.3 From a5ef563b1d676548a4c5016540833ff970230964 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 18:05:34 +0000 Subject: drm/xe/device: fix XE_MAX_GT_PER_TILE check Here XE_MAX_GT_PER_TILE is the total, therefore the gt index should always be less than. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240318180532.57522-5-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 14be34d9f543..5c254ec9c602 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -58,7 +58,7 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) { - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE)) + if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) gt_id = 0; return gt_id ? tile->media_gt : tile->primary_gt; -- cgit v1.2.3 From a96cd71ec7be0790f9fc4039ad21be8d214b03a4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 18:05:35 +0000 Subject: drm/xe/device: fix XE_MAX_TILES_PER_DEVICE check Here XE_MAX_TILES_PER_DEVICE is the gt array size, therefore the gt index should always be less than. v2 (Lucas): - Add fixes tag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Acked-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240318180532.57522-6-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 5c254ec9c602..d413bc2c6be5 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -79,7 +79,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) if (MEDIA_VER(xe) >= 13) { gt = xe_tile_get_gt(root_tile, gt_id); } else { - if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE)) + if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) gt_id = 0; gt = xe->tiles[gt_id].primary_gt; -- cgit v1.2.3 From 6d74e387aa1d7f62bdd6c7d1da6c3061450f3f55 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 18 Mar 2024 10:35:47 +0100 Subject: drm/xe: Drop bogus vma NULL check The vma pointer can't be NULL here. Cc: Matthew Auld Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240318093547.16326-1-nirmoy.das@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 7f54bc3e389d..8d3922d2206e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -618,7 +618,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; - if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && + if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && (is_devmem || !IS_DGFX(xe))) xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; -- cgit v1.2.3 From f87cf2877b16313966a98110888540cdd4c5c051 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 18 Mar 2024 10:10:55 +0100 Subject: drm/xe: Remove usage of unsafe strcpy Remove usage of unsafe strcpy with a helper function to convert engine class to string. Cc: Matthew Auld Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240318091055.638-1-nirmoy.das@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 44 ++++++++++++++------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 9e23ca7f45ad..c17cce53f19d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -499,8 +499,8 @@ static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg) kobject_put(kobj); } - static struct kobj_eclass * -kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name) +static struct kobj_eclass * +kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char *name) { struct kobj_eclass *keclass; int err = 0; @@ -635,6 +635,24 @@ static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(kobj); } +static const char *xe_hw_engine_class_to_str(enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + return "rcs"; + case XE_ENGINE_CLASS_VIDEO_DECODE: + return "vcs"; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return "vecs"; + case XE_ENGINE_CLASS_COPY: + return "bcs"; + case XE_ENGINE_CLASS_COMPUTE: + return "ccs"; + default: + return NULL; + } +} + /** * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT. * @gt: Xe GT. @@ -664,7 +682,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) goto err_object; for_each_hw_engine(hwe, gt, id) { - char name[MAX_ENGINE_CLASS_NAME_LEN]; + const char *name; struct kobj_eclass *keclass; if (hwe->class == XE_ENGINE_CLASS_OTHER || @@ -675,24 +693,8 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) continue; class_mask |= 1 << hwe->class; - - switch (hwe->class) { - case XE_ENGINE_CLASS_RENDER: - strcpy(name, "rcs"); - break; - case XE_ENGINE_CLASS_VIDEO_DECODE: - strcpy(name, "vcs"); - break; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - strcpy(name, "vecs"); - break; - case XE_ENGINE_CLASS_COPY: - strcpy(name, "bcs"); - break; - case XE_ENGINE_CLASS_COMPUTE: - strcpy(name, "ccs"); - break; - default: + name = xe_hw_engine_class_to_str(hwe->class); + if (!name) { err = -EINVAL; goto err_object; } -- cgit v1.2.3 From 2a4172be4013990a794a6ef201c0223b63295423 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 14 Mar 2024 16:49:06 +0000 Subject: drm/xe/display: mark DPT with XE_BO_PAGETABLE Otherwise in the case where we use normal system memory, the CPU access will always be cached, like when filling the DPT PTEs, which is likely not what we want since HW access could be incoherent on platforms like LNL. Marking as XE_BO_PAGETABLE will force wc/uc underneath on such platforms. Signed-off-by: Matthew Auld Cc: Juha-Pekka Heikkila Reviewed-by: Lucas De Marchi Reviewed-by: Juha-Pekka Heikkila Link: https://patchwork.freedesktop.org/patch/msgid/20240314164905.239449-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 722c84a56607..b220f136be70 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -100,17 +100,20 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, XE_BO_CREATE_VRAM0_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_PAGETABLE); else dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_PAGETABLE); if (IS_ERR(dpt)) dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_PAGETABLE); if (IS_ERR(dpt)) return PTR_ERR(dpt); -- cgit v1.2.3 From 1997eeeac5ba2664d06ee79979fdd2637badabe7 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Mon, 18 Mar 2024 22:18:50 +0200 Subject: drm/xe/display: Mark dpt and related vma as uncached Mark dpt and related vma as uncached to avoid pipe faults on some devices. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240318201850.127785-1-juhapekka.heikkila@gmail.com --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index b220f136be70..2eb622510186 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -30,7 +30,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ for (row = 0; row < height; row++) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); iosys_map_wr(map, *dpt_ofs, u64, pte); *dpt_ofs += 8; @@ -62,7 +62,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, for (column = 0; column < width; column++) { iosys_map_wr(map, *dpt_ofs, u64, pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB])); + xe->pat.idx[XE_CACHE_NONE])); *dpt_ofs += 8; src_idx++; @@ -122,7 +122,7 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, for (x = 0; x < size / XE_PAGE_SIZE; x++) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); iosys_map_wr(&dpt->vmap, x * 8, u64, pte); } @@ -168,7 +168,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo for (row = 0; row < height; row++) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte); *ggtt_ofs += XE_PAGE_SIZE; @@ -214,7 +214,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, for (x = 0; x < size; x += XE_PAGE_SIZE) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); xe_ggtt_set_pte(ggtt, vma->node.start + x, pte); } -- cgit v1.2.3 From bd415be89e8d63347142b7b5591787a63286ee7e Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 7 Mar 2024 12:22:13 +0530 Subject: drm/xe/vm : Remove duplicate assignment of XE_VM_FLAG_LR_MODE flag. vm->flags are already assigned with passed flags. Remove the redundant assignment. Cc: Matthew Brost Signed-off-by: Himal Prasad Ghimiray Reviewed: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240307065213.1968688-1-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 51d62323d9ee..80d43d75b1da 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1386,9 +1386,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (flags & XE_VM_FLAG_LR_MODE) { + if (vm->flags & XE_VM_FLAG_LR_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); - vm->flags |= XE_VM_FLAG_LR_MODE; vm->batch_invalidate_tlb = false; } -- cgit v1.2.3 From 2920c2cbbdd0a7f7a59cf16a7f09ede2dd5a2903 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 18 Mar 2024 14:01:40 -0400 Subject: drm/xe: Convert gt suspend/resume messages to debug Let's be quieter on production configuration and let's also print the entry point of the gt suspend when debug messages are enabled. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240318180141.267458-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 85408e7a932b..cfa5da900461 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -722,6 +722,7 @@ int xe_gt_suspend(struct xe_gt *gt) { int err; + xe_gt_dbg(gt, "suspending\n"); xe_gt_sanitize(gt); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -733,7 +734,7 @@ int xe_gt_suspend(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_gt_info(gt, "suspended\n"); + xe_gt_dbg(gt, "suspended\n"); return 0; @@ -749,6 +750,7 @@ int xe_gt_resume(struct xe_gt *gt) { int err; + xe_gt_dbg(gt, "resuming\n"); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -758,7 +760,7 @@ int xe_gt_resume(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_gt_info(gt, "resumed\n"); + xe_gt_dbg(gt, "resumed\n"); return 0; -- cgit v1.2.3 From f7f24b7950af4b1548ad5075ddb13eeb333bb782 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 18 Mar 2024 14:01:41 -0400 Subject: drm/xe: Add dbg messages on the suspend resume functions. In case of the suspend/resume flow getting locked up we can get reports with some useful hints on where it might get locked and if that has failed. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240318180141.267458-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 9fbb6f6c598a..cc650a92c2fc 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -80,13 +80,15 @@ int xe_pm_suspend(struct xe_device *xe) u8 id; int err; + drm_dbg(&xe->drm, "Suspending device\n"); + for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - return err; + goto err; xe_display_pm_suspend(xe); @@ -94,7 +96,7 @@ int xe_pm_suspend(struct xe_device *xe) err = xe_gt_suspend(gt); if (err) { xe_display_pm_resume(xe); - return err; + goto err; } } @@ -102,7 +104,11 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + drm_dbg(&xe->drm, "Device suspended\n"); return 0; +err: + drm_dbg(&xe->drm, "Device suspend failed %d\n", err); + return err; } /** @@ -118,13 +124,15 @@ int xe_pm_resume(struct xe_device *xe) u8 id; int err; + drm_dbg(&xe->drm, "Resuming device\n"); + for_each_tile(tile, xe, id) xe_wa_apply_tile_workarounds(tile); for_each_gt(gt, xe, id) { err = xe_pcode_init(gt); if (err) - return err; + goto err; } xe_display_pm_resume_early(xe); @@ -135,7 +143,7 @@ int xe_pm_resume(struct xe_device *xe) */ err = xe_bo_restore_kernel(xe); if (err) - return err; + goto err; xe_irq_resume(xe); @@ -146,9 +154,13 @@ int xe_pm_resume(struct xe_device *xe) err = xe_bo_restore_user(xe); if (err) - return err; + goto err; + drm_dbg(&xe->drm, "Device resumed\n"); return 0; +err: + drm_dbg(&xe->drm, "Device resume failed %d\n", err); + return err; } static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) -- cgit v1.2.3 From 43c4ff3ca2d1c93a9bfa5ad60e0a06832e830ec6 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 4 Mar 2024 08:26:16 -0800 Subject: drm/xe/guc: Don't support older GuC 70.x releases Supporting older GuC versions comes with baggage, both on the coding side (due to interfaces only being available from a certain version onwards) and on the testing side (due to having to make sure the driver works as expected with older GuCs). Since all of our Xe platform are still under force probe, we haven't committed to support any specific GuC version and we therefore don't need to support the older once, which means that we can force a bottom limit to what GuC we accept. This allows us to remove any conditional statements based on older GuC versions and also to approach newer additions knowing that we'll never attempt to load something older than our minimum requirement. As an initial value, the minimum expected version is set to 70.19, which is the version currently in the firmware table, but the expectation is that this will be bumbed every time we update the table, until we remove the force probe. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Lucas De Marchi Cc: Matt Roper Cc: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240304162616.824884-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 14 ++------------ drivers/gpu/drm/xe/xe_uc_fw.c | 36 ++++++++++++++---------------------- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index caa86ccbe9e7..fa11cad8386c 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -133,15 +133,10 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc) return flags; } -#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) - static u32 guc_ctl_wa_flags(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); - struct xe_uc_fw *uc_fw = &guc->fw; - struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; - u32 flags = 0; if (XE_WA(gt, 22012773006)) @@ -171,13 +166,8 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 1509372804)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; - if (XE_WA(gt, 14018913170)) { - if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(70, 7, 0)) - flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; - else - drm_dbg(&xe->drm, "Skip WA 14018913170: GUC version expected >= 70.7.0, found %u.%u.%u\n", - version->major, version->minor, version->patch); - } + if (XE_WA(gt, 14018913170)) + flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; return flags; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index a9d25b3fa67c..99c5490132d5 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -296,36 +296,28 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); } -static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) +static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) { struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - xe_gt_assert(gt, release->major >= 70); - - if (release->major > 70 || release->minor >= 6) { - /* v70.6.0 adds CSS header support */ - compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, - css->submission_version); - compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, - css->submission_version); - compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, - css->submission_version); - } else if (release->minor >= 3) { - /* v70.3.0 introduced v1.1.0 */ - compatibility->major = 1; - compatibility->minor = 1; - compatibility->patch = 0; - } else { - /* v70.0.0 introduced v1.0.0 */ - compatibility->major = 1; - compatibility->minor = 0; - compatibility->patch = 0; + + /* We don't support GuC releases older than 70.19 */ + if (release->major < 70 || (release->major == 70 && release->minor < 19)) { + xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", + release->major, release->minor); + return -EINVAL; } + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version); + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version); + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version); + uc_fw->private_data_size = css->private_data_size; + + return 0; } int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) @@ -424,7 +416,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); if (uc_fw->type == XE_UC_FW_TYPE_GUC) - guc_read_css_info(uc_fw, css); + return guc_read_css_info(uc_fw, css); return 0; } -- cgit v1.2.3 From aacf3f629ad7f82170b207158b810066fc354322 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 19 Mar 2024 11:41:53 -0700 Subject: drm/xe: Separate out sched/deregister_done handling Abstract out the core part of sched_done and deregister_done handlers to separate functions to decouple them from any protocol error handling part and make them more readable. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240319184153.16667-1-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 64 ++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 82c955a2a15c..4a2b8e6b81b8 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1574,28 +1574,8 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); } -int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q) { - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (unlikely(!exec_queue_pending_enable(q) && - !exec_queue_pending_disable(q))) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); - return -EPROTO; - } - trace_xe_exec_queue_scheduling_done(q); if (exec_queue_pending_enable(q)) { @@ -1615,17 +1595,15 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) deregister_exec_queue(guc, q); } } - - return 0; } -int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; - if (unlikely(len < 1)) { + if (unlikely(len < 2)) { drm_err(&xe->drm, "Invalid length %u", len); return -EPROTO; } @@ -1634,13 +1612,20 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (unlikely(!q)) return -EPROTO; - if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || - exec_queue_pending_enable(q) || exec_queue_enabled(q)) { + if (unlikely(!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q))) { drm_err(&xe->drm, "Unexpected engine state 0x%04x", atomic_read(&q->guc->state)); return -EPROTO; } + handle_sched_done(guc, q); + + return 0; +} + +static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) +{ trace_xe_exec_queue_deregister_done(q); clear_exec_queue_registered(q); @@ -1649,6 +1634,31 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) xe_exec_queue_put(q); else __guc_exec_queue_fini(guc, q); +} + +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || + exec_queue_pending_enable(q) || exec_queue_enabled(q)) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&q->guc->state)); + return -EPROTO; + } + + handle_deregister_done(guc, q); return 0; } -- cgit v1.2.3 From 260fa80d4a4163ea60661c30a03f72681ba7d450 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 19 Mar 2024 10:59:46 -0700 Subject: drm/xe: Streamline exec queue freeing path Ensure exec queue freeing happens at one place, that is in __xe_exec_queue_free(). It releases q->vm reference also. Set q->vm before handling extensions as they can potentially reference it. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240319175947.15890-1-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 2016c1af9633..a1b3c7144977 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -33,6 +33,13 @@ enum xe_exec_queue_sched_prop { static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, u64 extensions, int ext_number); +static void __xe_exec_queue_free(struct xe_exec_queue *q) +{ + if (q->vm) + xe_vm_put(q->vm); + kfree(q); +} + static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, @@ -74,6 +81,9 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + if (vm) + q->vm = xe_vm_get(vm); + if (extensions) { /* * may set q->usm, must come before xe_lrc_init(), @@ -81,14 +91,11 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, */ err = exec_queue_user_extensions(xe, q, extensions, 0); if (err) { - kfree(q); + __xe_exec_queue_free(q); return ERR_PTR(err); } } - if (vm) - q->vm = xe_vm_get(vm); - if (xe_exec_queue_is_parallel(q)) { q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; @@ -97,13 +104,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } -static void __xe_exec_queue_free(struct xe_exec_queue *q) -{ - if (q->vm) - xe_vm_put(q->vm); - kfree(q); -} - static int __xe_exec_queue_init(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); -- cgit v1.2.3 From c9cc3d6586e6f161383ff41b9d6b8c83898d74aa Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 19 Mar 2024 10:49:19 -0700 Subject: drm/xe: Use correct function pointer type Use xe_exec_queue_user_extension_fn type for exec_queue_user_extension_funcs.` Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Ashutosh Dixit Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240319174919.1847-1-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a1b3c7144977..730eb7d2a639 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -388,7 +388,7 @@ typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 extension); -static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { +static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, }; -- cgit v1.2.3 From a6eff8f9c7e844cb24ccb188ca24abcd59734e74 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 19 Mar 2024 14:09:25 +0100 Subject: drm/xe: Add a NULL check in xe_ttm_stolen_mgr_init Add an explicit check to ensure that the mgr is not NULL. Cc: Matthew Auld Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240319130925.22399-1-nirmoy.das@intel.com --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 3107d2a12426..fb35e46d68b4 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -207,6 +207,11 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) u64 stolen_size, io_size, pgsize; int err; + if (!mgr) { + drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n"); + return; + } + if (IS_SRIOV_VF(xe)) stolen_size = 0; else if (IS_DGFX(xe)) -- cgit v1.2.3 From 72bae5c28159e89101942aa2327be5344879f1b2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 5 Mar 2024 21:20:00 -0800 Subject: drm/xe: Drop ggtt invalidate from display code Only buffers mapped in the GGTT used by the GuC require an invalidation. Display buffers do not require an invalidation. Delete the invalidatio from display code and make invalidation a static function in xe_ggtt.c. Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240306052002.311196-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 +-- drivers/gpu/drm/xe/xe_ggtt.c | 4 +++- drivers/gpu/drm/xe/xe_ggtt.h | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 2eb622510186..311ffad6e2c0 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -241,7 +241,6 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, rot_info->plane[i].dst_stride); } - xe_ggtt_invalidate(ggtt); out_unlock: mutex_unlock(&ggtt->lock); out: @@ -384,4 +383,4 @@ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) void intel_dpt_destroy(struct i915_address_space *vm) { return; -} \ No newline at end of file +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 325337c38961..cc60ea08fc84 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -200,6 +200,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); } +static void xe_ggtt_invalidate(struct xe_ggtt *ggtt); + static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) { struct drm_mm_node *hole; @@ -261,7 +263,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) drm_warn(>_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); } -void xe_ggtt_invalidate(struct xe_ggtt *ggtt) +static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) { /* Each GT in a tile has its own TLB to cache GGTT lookups */ ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 42705e1338e1..dc9631d20720 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -11,7 +11,6 @@ struct drm_printer; void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); -void xe_ggtt_invalidate(struct xe_ggtt *ggtt); int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); -- cgit v1.2.3 From 231c4110873a5db4975512c30aa10edcc5be56e2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 5 Mar 2024 21:20:01 -0800 Subject: drm/xe: Add XE_BO_GGTT_INVALIDATE flag Add XE_BO_GGTT_INVALIDATE flag which indicates the GGTT should be invalidated when a BO is added / removed from the GGTT. This is typically set when a BO is used by the GuC as the GuC has GGTT TLBs. Cc: Maarten Lankhorst Signed-off-by: Matthew Brost [mlankhorst: Small fix to only inherit GGTT_INVALIDATE from src bo] [mlankhorst: Remove _BIT from name] Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240306052002.311196-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 2 +- drivers/gpu/drm/xe/xe_bo.c | 8 +++++--- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_ggtt.c | 12 ++++++++---- drivers/gpu/drm/xe/xe_ggtt.h | 3 ++- drivers/gpu/drm/xe/xe_guc_ads.c | 3 ++- drivers/gpu/drm/xe/xe_guc_ct.c | 3 ++- drivers/gpu/drm/xe/xe_guc_hwconfig.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 3 ++- drivers/gpu/drm/xe/xe_guc_pc.c | 3 ++- drivers/gpu/drm/xe/xe_hw_engine.c | 3 ++- drivers/gpu/drm/xe/xe_lrc.c | 3 ++- drivers/gpu/drm/xe/xe_memirq.c | 1 + drivers/gpu/drm/xe/xe_sa.c | 3 ++- drivers/gpu/drm/xe/xe_uc_fw.c | 3 ++- 15 files changed, 36 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 311ffad6e2c0..2a50a7eaaa31 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -323,7 +323,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) xe_bo_unpin_map_no_vm(vma->dpt); else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || vma->bo->ggtt_node.start != vma->node.start) - xe_ggtt_remove_node(ggtt, &vma->node); + xe_ggtt_remove_node(ggtt, &vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8383c56c9960..9298546909b5 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1580,13 +1580,15 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src) { struct xe_bo *bo; + u32 dst_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT; + + dst_flags |= (*src)->flags & XE_BO_GGTT_INVALIDATE; xe_assert(xe, IS_DGFX(xe)); xe_assert(xe, !(*src)->vmap.is_iomem); - bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, (*src)->size, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, + (*src)->size, dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 87ae8ee7c316..52e441f77e96 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -46,6 +46,7 @@ #define XE_BO_NEEDS_CPU_ACCESS BIT(13) #define XE_BO_NEEDS_UC BIT(14) #define XE_BO_NEEDS_64K BIT(15) +#define XE_BO_GGTT_INVALIDATE BIT(16) /* this one is trigger internally only */ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index cc60ea08fc84..9947ded57cf1 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -390,7 +390,8 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) xe_ggtt_set_pte(ggtt, start + offset, pte); } - xe_ggtt_invalidate(ggtt); + if (bo->flags & XE_BO_GGTT_INVALIDATE) + xe_ggtt_invalidate(ggtt); } static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, @@ -435,7 +436,8 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + bool invalidate) { xe_device_mem_access_get(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); @@ -444,7 +446,8 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) drm_mm_remove_node(node); node->size = 0; - xe_ggtt_invalidate(ggtt); + if (invalidate) + xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); xe_device_mem_access_put(tile_to_xe(ggtt->tile)); @@ -458,7 +461,8 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) /* This BO is not currently in the GGTT */ xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); - xe_ggtt_remove_node(ggtt, &bo->ggtt_node); + xe_ggtt_remove_node(ggtt, &bo->ggtt_node, + bo->flags & XE_BO_GGTT_INVALIDATE); } int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index dc9631d20720..8306ef74abc6 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -23,7 +23,8 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + bool invalidate); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 6ad4c1a90a78..df2bffb7e220 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -274,7 +274,8 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 355edd4d758a..d9fa81900ff5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -156,7 +156,8 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index ea49f3885c10..1173453c4039 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -79,7 +79,8 @@ int xe_guc_hwconfig_init(struct xe_guc *guc) bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size), XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); guc->hwconfig.bo = bo; diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 45135c3520e5..9302a7faaf0b 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -85,7 +85,8 @@ int xe_guc_log_init(struct xe_guc_log *log) bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f4b031b8d9de..eb174547c8f2 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -955,7 +955,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 2c5615130a38..b94924a4f319 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -519,7 +519,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(hwe->hwsp)) { err = PTR_ERR(hwe->hwsp); goto err_name; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 3c4d31703207..95e0f7b1ec3f 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -746,7 +746,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ring_size + xe_lrc_size(xe, hwe->class), ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 76e95535d7f6..0eb28681bec7 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -129,6 +129,7 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) ttm_bo_type_kernel, XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE | XE_BO_NEEDS_UC | XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) { diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 2c4632259edd..164202ac6454 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -49,7 +49,8 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 99c5490132d5..400651485b85 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -763,7 +763,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) return 0; err = uc_fw_copy(uc_fw, fw->data, fw->size, - XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT | + XE_BO_GGTT_INVALIDATE); uc_fw_release(fw); -- cgit v1.2.3 From a825cef2e793f085a3d0443c3843da7f81af80cb Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 5 Mar 2024 21:20:02 -0800 Subject: drm/xe: Move xe_ggtt_invalidate out from ggtt->lock Considering the caller of the GGTT functions should keep the backing storage alive before the function completes, it's not necessary to invalidate with the GGTT lock held. This just adds latency for every user of the GGTT. Signed-off-by: Matthew Brost Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240306052002.311196-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 9947ded57cf1..f54523d7d03c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -389,9 +389,6 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); xe_ggtt_set_pte(ggtt, start + offset, pte); } - - if (bo->flags & XE_BO_GGTT_INVALIDATE) - xe_ggtt_invalidate(ggtt); } static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, @@ -420,6 +417,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); + + if (!err && bo->flags & XE_BO_GGTT_INVALIDATE) + xe_ggtt_invalidate(ggtt); xe_device_mem_access_put(tile_to_xe(ggtt->tile)); return err; @@ -440,16 +440,16 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, bool invalidate) { xe_device_mem_access_get(tile_to_xe(ggtt->tile)); - mutex_lock(&ggtt->lock); + mutex_lock(&ggtt->lock); xe_ggtt_clear(ggtt, node->start, node->size); drm_mm_remove_node(node); node->size = 0; + mutex_unlock(&ggtt->lock); if (invalidate) xe_ggtt_invalidate(ggtt); - mutex_unlock(&ggtt->lock); xe_device_mem_access_put(tile_to_xe(ggtt->tile)); } -- cgit v1.2.3 From 3896b1695f9b4aac42daf189caa2562d44774e2f Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 14 Mar 2024 08:52:21 +0200 Subject: drm/xe/display: fix type of intel_uncore_read*() functions Some of the backported intel_uncore_read*() functions used the wrong types. Change the function declarations accordingly. Reviewed-by: Gustavo Sousa Signed-off-by: Luca Coelho Link: https://patchwork.freedesktop.org/patch/msgid/20240314065221.1181158-1-luciano.coelho@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index cd26ddc0f69e..ef79793caa72 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -25,15 +25,15 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore, return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); } -static inline u32 intel_uncore_read8(struct intel_uncore *uncore, - i915_reg_t i915_reg) +static inline u8 intel_uncore_read8(struct intel_uncore *uncore, + i915_reg_t i915_reg) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg); } -static inline u32 intel_uncore_read16(struct intel_uncore *uncore, +static inline u16 intel_uncore_read16(struct intel_uncore *uncore, i915_reg_t i915_reg) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); -- cgit v1.2.3 From dd1c61057e9e3309aca99e6c215cf964f92c90c1 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 18 Mar 2024 17:10:57 +0530 Subject: drm/xe/gt: Remove continue statement which has no effect Remove continue statement which does not have real effect as no actions are to be taken post continue. Signed-off-by: Tejas Upadhyay Reviewed-by: Himal Prasad Ghimiray Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240318114057.3831274-1-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_irq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 2275ca35a3c7..996806353171 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -326,7 +326,6 @@ static void gt_irq_handler(struct xe_tile *tile, xe_heci_gsc_irq_handler(xe, intr_vec); else gt_other_irq_handler(engine_gt, instance, intr_vec); - continue; } } } -- cgit v1.2.3 From 0267ee1914d21555e8e8817b32f2d07d8bf58cac Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Mon, 18 Mar 2024 14:01:20 -0700 Subject: drm/xe/xelpg: Add Wa_14020495402 Disable clockgating for TDL SVHS fub. v2: Extend the Wa to 1274(MattR) Bspec: 46045 Reviewed-by: Matt Roper Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240318210120.564692-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 95969935f58b..65af9fe95db5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -364,6 +364,7 @@ #define DISABLE_EARLY_READ REG_BIT(14) #define ENABLE_LARGE_GRF_MODE REG_BIT(12) #define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define DISABLE_TDL_SVHS_GATING REG_BIT(1) #define DISABLE_DOP_GATING REG_BIT(0) #define RT_CTRL XE_REG_MCR(0xe530) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 54740d246310..74b33a3845f2 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -401,6 +401,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, XE_RTP_NOCHECK)) }, + { XE_RTP_NAME("14020495402"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_TDL_SVHS_GATING)) + }, /* Xe2_LPG */ -- cgit v1.2.3 From 649a125a88da64a66b0836cb7998bb433bbf1bf5 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 18 Mar 2024 08:49:24 -0700 Subject: drm/xe: Always check force_wake_get return code A force_wake_get failure means that the HW might not be awake for the access we're doing; this can lead to an immediate error or it can be a more subtle problem (e.g. a register read might return an incorrect value that is still valid, leading the driver to make a wrong choice instead of flagging an error). We avoid an error from the force_wake function because callers might handle or tolerate the error, but this only works if all callers are checking the error code. The majority already do, but a few are not. These are mainly falling into 3 categories, which are each handled differently: 1) error capture: in this case we want to continue the capture, but we log an info message in dmesg to notify the user that the capture might have incorrect data. 2) ioctl: in this case we return a -EIO error to userspace 3) unabortable actions: these are scenarios where we can't simply abort and retry and so it's better to just try it anyway because there is a chance the HW is awake even with the failure. In this case we throw a warning so we know there was a forcewake problem if something fails down the line. v2: use gt_WARN_ON where appropriate Signed-off-by: Daniele Ceraolo Spurio Cc: Tejas Upadhyay Reviewed-by: Matt Roper Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240318154924.3453513-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 9 +++++++-- drivers/gpu/drm/xe/xe_gsc.c | 2 +- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 +- drivers/gpu/drm/xe/xe_guc.c | 5 +++-- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 4 +++- drivers/gpu/drm/xe/xe_query.c | 3 ++- 7 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 0fcd30680323..7d3aa6bd3524 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -13,6 +13,7 @@ #include "xe_exec_queue.h" #include "xe_force_wake.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_hw_engine.h" @@ -64,7 +65,9 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) { struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); xe_vm_snapshot_capture_delayed(ss->vm); xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); @@ -180,7 +183,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, } } - xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 09f6e7899921..8339b0b49dfb 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -326,7 +326,7 @@ static void gsc_work(struct work_struct *work) spin_unlock_irq(&gsc->lock); xe_pm_runtime_get(xe); - xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); if (actions & GSC_ACTION_ER_COMPLETE) { ret = gsc_er_complete(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index a3c4ffba679d..25b4111097bc 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,7 +247,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_wait(gt, seqno); } else if (xe_device_uc_enabled(xe)) { - xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index fa11cad8386c..c558d978462a 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -240,10 +240,11 @@ static void guc_write_params(struct xe_guc *guc) static void guc_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_gt *gt = guc_to_gt(guc); - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_uc_fini_hw(&guc_to_gt(guc)->uc); - xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index eb174547c8f2..786acaf71931 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -927,7 +927,7 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg) return; } - xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 4a2b8e6b81b8..a71be57f0fc2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -833,7 +833,9 @@ static void simple_error_capture(struct xe_exec_queue *q) } } - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL)) + xe_gt_info(guc_to_gt(guc), + "failed to get forcewake for error capture"); xe_guc_ct_print(&guc->ct, &p, true); guc_exec_queue_print(q, &p); for_each_hw_engine(hwe, guc_to_gt(guc), id) { diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e80321b34918..fcd8680d2ccc 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -148,7 +148,8 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)) + return -EIO; __read_timestamps(gt, RING_TIMESTAMP(hwe->mmio_base), -- cgit v1.2.3 From 4c15a6dcee20951ea619eca26e249f8f13275224 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 19 Mar 2024 12:51:01 -0700 Subject: drm/xe/uc: Use u64 for offsets for which we use upper_32_bits() The GGTT is currently a 32 bit address space, but the HW and GuC support 48b addresses in GGTT-related operations, both to keep the interface/HW paths common between PPGTT and GGTT and to allow for future increase of the GGTT size. This leaves us having to program a 64b field with a 32b offset, which currently we're in some cases doing this by using an upper_32_bits() call on a 32b variable, which doesn't make any sense. To do this cleanly we have 2 options: 1 - Set the upper 32 bits directly to zero. 2 - Use 64b variables for the offset and keep programming the whole thing, so we're ready if we ever have bigger offsets. This patch goes with option #2 and switches the related variables to u64. v2: don't change the log ctl flag variable (John) Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240319195101.2784480-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_guc_hwconfig.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 1173453c4039..f035ad59f68e 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -14,7 +14,7 @@ #include "xe_guc.h" #include "xe_map.h" -static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) +static int send_get_hwconfig(struct xe_guc *guc, u64 ggtt_addr, u32 size) { u32 action[] = { XE_GUC_ACTION_GET_HWCONFIG, diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a71be57f0fc2..ce46ce22fa5f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -533,7 +533,7 @@ static void register_engine(struct xe_exec_queue *q) info.flags = CONTEXT_REGISTRATION_FLAG_KMD; if (xe_exec_queue_is_parallel(q)) { - u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); + u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); struct iosys_map map = xe_lrc_parallel_map(lrc); info.wq_desc_lo = lower_32_bits(ggtt_addr + diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 400651485b85..3554f66872b9 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -780,7 +780,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) { struct xe_device *xe = uc_fw_to_xe(uc_fw); struct xe_gt *gt = uc_fw_to_gt(uc_fw); - u32 src_offset, dma_ctrl; + u64 src_offset; + u32 dma_ctrl; int ret; xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); -- cgit v1.2.3 From 1008368e1c7e36bdec01b3cce1e76606dc3ad46f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 20 Mar 2024 11:27:31 +0000 Subject: drm/xe/bb: assert width in xe_bb_create_job() The queue width will determine the number of batch buffer emitted into the ring. In the case of xe_bb_create_job() we pass exactly one batch address, therefore add an assert for the width to make sure we don't go out of bounds. While here also convert to the helper to determine if the queue is migration based. Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240320112730.219854-3-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_bb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 7c124475c428..a35e0781b7b9 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -96,7 +96,8 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, { u64 addr = xe_sa_bo_gpu_addr(bb->bo); - xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION)); + xe_gt_assert(q->gt, !xe_sched_job_is_migration(q)); + xe_gt_assert(q->gt, q->width == 1); return __xe_bb_create_job(q, bb, &addr); } -- cgit v1.2.3 From ee3b1e31d55cefe8d7995c6bbdfc028a068576d8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 20 Mar 2024 11:27:32 +0000 Subject: drm/xe/bb: assert width in xe_bb_create_migration_job() The q->width should always be exactly one here for migration queue/vm. The width will anyway be overridden later since we need to emit two jumps for special migration jobs. Enforce that here to ensure caller is not doing something strange. While here also convert to the helper to determine if the queue is migration based. Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240320112730.219854-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_bb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index a35e0781b7b9..541361caff3b 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -86,7 +86,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, }; xe_gt_assert(q->gt, second_idx <= bb->len); - xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION); + xe_gt_assert(q->gt, xe_sched_job_is_migration(q)); + xe_gt_assert(q->gt, q->width == 1); return __xe_bb_create_job(q, bb, addr); } -- cgit v1.2.3 From f20b7671dffd8515b6a2a996c6796bd990e9cf8d Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 20 Mar 2024 14:03:25 +0530 Subject: drm/xe: Use USEC_PER_MSEC rather than the hard coding Use USEC_PER_MSEC rather than the hard coded value of 1000. Static analyzer Reported "casting either timeout_ms or 1000U to type u64" to avoid overflow-before-widen. Using USEC_PER_MSEC seems better and will help with static analyzer report cleanup. Cc: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240320083325.3258720-1-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index b324dc2a5deb..627e094c7cbe 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -74,7 +74,7 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0, - timeout_ms * 1000, NULL, atomic); + timeout_ms * USEC_PER_MSEC, NULL, atomic); if (err) return err; -- cgit v1.2.3 From cdf287ce7bd308c51e97080ebe7e9b12c8256f26 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 20 Mar 2024 12:42:32 -0700 Subject: drm/xe: Add debug messages for MMU notifier and VMA invalidate Extra debug is useful when working on VM issues. Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240320194232.1910688-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 80d43d75b1da..e3692b7e1711 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -648,6 +648,10 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, if (!mmu_notifier_range_blockable(range)) return false; + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + down_write(&vm->userptr.notifier_lock); mmu_interval_set_seq(mni, cur_seq); @@ -3233,6 +3237,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "INVALIDATE: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + /* Check that we don't race with page-table updates */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { -- cgit v1.2.3 From 4b275f502a0d3668195762fb55fa00e659ad1b0b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 21 Mar 2024 11:06:30 +0000 Subject: drm/xe/query: fix gt_id bounds check The user provided gt_id should always be less than the XE_MAX_GT_PER_TILE. Fixes: 7793d00d1bf5 ("drm/xe: Correlate engine and cpu timestamps with better accuracy") Signed-off-by: Matthew Auld Cc: Nirmoy Das Cc: # v6.8+ Reviewed-by: Nirmoy Das Acked-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240321110629.334701-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index fcd8680d2ccc..df407d73e5f5 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -133,7 +133,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id > XE_MAX_GT_PER_TILE) + if (eci->gt_id >= XE_MAX_GT_PER_TILE) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); -- cgit v1.2.3 From 070f8fd6c654ad5c1d5c11ec4e6920fcdc979268 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 21 Mar 2024 18:15:48 +0200 Subject: drm/xe: remove unused struct xe_device members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit modeset_restore_state has been unused since commit 6af0ffc0db93 ("drm/i915/display: move restore state and ctx under display sub-struct"). member global_obj_list has been unused since commit e2925e19c006 ("drm/i915/display: move global_obj_list under display sub-struct"). hti_state has been unused since commit 62749912540b ("drm/i915/display: move hti under display sub-struct"). snps_phy_failed_calibration has been unused since commit 3a7e2d58f800 ("drm/i915: move snps_phy_failed_calibration to display sub-struct under snps"). Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240321161548.3509672-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_device_types.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9785eef2e5a4..1df3dcc17d75 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -498,10 +498,7 @@ struct xe_device { struct mutex sb_lock; /* Should be in struct intel_display */ - u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state; - u8 snps_phy_failed_calibration; - struct drm_atomic_state *modeset_restore_state; - struct list_head global_obj_list; + u32 skl_preferred_vco_freq, max_dotclk_freq; union { /* only to allow build, not used functionally */ -- cgit v1.2.3 From 241dea210153fe12dc7814b14d8cd9ee76c8510d Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 7 Mar 2024 05:52:27 -0800 Subject: drm/xe: Make devcoredump VM error state print consistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes VM error consistent with [x].length and [x].data. Cc: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240307135229.41973-1-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e3692b7e1711..900fd868a166 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3458,12 +3458,15 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) unsigned long i, j; for (i = 0; i < snap->num_snaps; i++) { - if (IS_ERR(snap->snap[i].data)) - goto uncaptured; - drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); - drm_printf(p, "[%llx].data: ", - snap->snap[i].ofs); + + if (IS_ERR(snap->snap[i].data)) { + drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, + PTR_ERR(snap->snap[i].data)); + continue; + } + + drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { u32 *val = snap->snap[i].data + j; @@ -3473,12 +3476,6 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) } drm_puts(p, "\n"); - continue; - -uncaptured: - drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n", - snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1, - PTR_ERR(snap->snap[i].data)); } } -- cgit v1.2.3 From e5f661bb56d49f05d095b3d40e27c0ce8039aef0 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 7 Mar 2024 05:52:28 -0800 Subject: drm/xe/devcoredump: Print errno if VM snapshot was not captured MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My testing machine has only 8GB of RAM and while running piglit tests I can reach the OOM cache in xe_vm_snapshot_capture() snap allocaiton sometimes. So to differentiate the OOM from race between capture and UMDs unbinbind VMs here I'm adding a '[0].error: -12' to devcoredump. v2: - fix returned errno values Cc: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240307135229.41973-2-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 6 ++---- drivers/gpu/drm/xe/xe_vm.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7d3aa6bd3524..3a6263ecff01 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -120,10 +120,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, if (coredump->snapshot.hwe[i]) xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i], &p); - if (coredump->snapshot.vm) { - drm_printf(&p, "\n**** VM state ****\n"); - xe_vm_snapshot_print(coredump->snapshot.vm, &p); - } + drm_printf(&p, "\n**** VM state ****\n"); + xe_vm_snapshot_print(coredump->snapshot.vm, &p); return count - iter.remain; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 900fd868a166..d82d7cd27123 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3359,8 +3359,10 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) if (num_snaps) snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); - if (!snap) + if (!snap) { + snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); goto out_unlock; + } snap->num_snaps = num_snaps; i = 0; @@ -3400,7 +3402,7 @@ out_unlock: void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) { - if (!snap) + if (IS_ERR(snap)) return; for (int i = 0; i < snap->num_snaps; i++) { @@ -3457,6 +3459,11 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) { unsigned long i, j; + if (IS_ERR(snap)) { + drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); + return; + } + for (i = 0; i < snap->num_snaps; i++) { drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); @@ -3483,7 +3490,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) { unsigned long i; - if (!snap) + if (IS_ERR(snap)) return; for (i = 0; i < snap->num_snaps; i++) { -- cgit v1.2.3 From 8f6444e1d1ec5b226d66b680787ee71e2cfe8264 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 7 Mar 2024 05:52:29 -0800 Subject: drm/xe: Nuke EXEC_QUEUE_FLAG_PERSISTENT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a left over of commit f1a9abc0cf31 ("drm/xe/uapi: Remove support for persistent exec_queues"). Cc: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240307135229.41973-3-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_exec_queue_types.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 62b3d9d1d7cd..9cc689f50db0 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -76,14 +76,12 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_KERNEL BIT(1) /* kernel engine only destroyed at driver unload */ #define EXEC_QUEUE_FLAG_PERMANENT BIT(2) -/* queue keeps running pending jobs after destroy ioctl */ -#define EXEC_QUEUE_FLAG_PERSISTENT BIT(3) /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ -#define EXEC_QUEUE_FLAG_VM BIT(4) +#define EXEC_QUEUE_FLAG_VM BIT(3) /* child of VM queue for multi-tile VM jobs */ -#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(4) /* kernel exec_queue only, set priority to highest level */ -#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(6) +#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(5) /** * @flags: flags for this exec queue, should statically setup aside from ban -- cgit v1.2.3 From c04b8aaeb4b23156a1ac7c14c28fd0b64687b290 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 21 Mar 2024 12:12:19 -0700 Subject: drm/xe/guc: Add some failure checks Return failures from pc_adjust_freq_bounds. Signed-off-by: Vinay Belgaumkar Reviewed-by: Rodrigo Vivi Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240321191219.243583-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 786acaf71931..b242af8ed59f 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -714,24 +714,28 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) ret = pc_action_query_task_state(pc); if (ret) - return ret; + goto out; /* * GuC defaults to some RPmax that is not actually achievable without * overclocking. Let's adjust it to the Hardware RP0, which is the * regular maximum */ - if (pc_get_max_freq(pc) > pc->rp0_freq) - pc_set_max_freq(pc, pc->rp0_freq); + if (pc_get_max_freq(pc) > pc->rp0_freq) { + ret = pc_set_max_freq(pc, pc->rp0_freq); + if (ret) + goto out; + } /* * Same thing happens for Server platforms where min is listed as * RPMax */ if (pc_get_min_freq(pc) > pc->rp0_freq) - pc_set_min_freq(pc, pc->rp0_freq); + ret = pc_set_min_freq(pc, pc->rp0_freq); - return 0; +out: + return ret; } static int pc_adjust_requested_freq(struct xe_guc_pc *pc) -- cgit v1.2.3 From b4abeb5545bb3ddcdda3c19067680ad0b2259be4 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 21 Mar 2024 12:55:12 -0700 Subject: drm/xe/guc: Check error code when initializing the CT mutex The initialization via drmm_mutex_init can fail, so we need to check the return code and escalate the failure. The mutex initialization has been moved after all the other init steps that can't fail, so we're always guaranteed to have those done and don't have to check in the cleanup code. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240321195512.274210-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d9fa81900ff5..1d930a8eeeca 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -145,13 +145,16 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) xe_assert(xe, !(guc_ct_size() % PAGE_SIZE)); - drmm_mutex_init(&xe->drm, &ct->lock); spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); + err = drmm_mutex_init(&xe->drm, &ct->lock); + if (err) + return err; + primelockdep(ct); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), -- cgit v1.2.3 From 35b22649eb4155ca6bcffcb2c6e2a1d311aaaf72 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 22 Mar 2024 07:48:43 -0700 Subject: drm/xe: Fix END redefinition mips declares an END macro in its headers so it can't be used without namespace in a driver like xe. Instead of coming up with a longer name, just remove the macro and replace its use with 0 since it's still clear what that means: set_offsets() was already using that implicitly when checking the data variable. Reported-by: Guenter Roeck Closes: http://kisskb.ellerman.id.au/kisskb/buildresult/15143996/ Tested-by: Guenter Roeck Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240322145037.196548-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 95e0f7b1ec3f..db0793273de0 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -115,7 +115,6 @@ static void set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END 0 { const u32 base = hwe->mmio_base; @@ -186,7 +185,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 dg2_xcs_offsets[] = { @@ -220,7 +219,7 @@ static const u8 dg2_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 gen12_rcs_offsets[] = { @@ -316,7 +315,7 @@ static const u8 gen12_rcs_offsets[] = { REG(0x084), NOP(1), - END + 0 }; static const u8 xehp_rcs_offsets[] = { @@ -357,7 +356,7 @@ static const u8 xehp_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 dg2_rcs_offsets[] = { @@ -400,7 +399,7 @@ static const u8 dg2_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 mtl_rcs_offsets[] = { @@ -443,7 +442,7 @@ static const u8 mtl_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; #define XE2_CTX_COMMON \ @@ -489,7 +488,7 @@ static const u8 xe2_rcs_offsets[] = { LRI(1, 0), /* [0x47] */ REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - END + 0 }; static const u8 xe2_bcs_offsets[] = { @@ -500,16 +499,15 @@ static const u8 xe2_bcs_offsets[] = { REG16(0x200), /* [0x42] BCS_SWCTRL */ REG16(0x204), /* [0x44] BLIT_CCTL */ - END + 0 }; static const u8 xe2_xcs_offsets[] = { XE2_CTX_COMMON, - END + 0 }; -#undef END #undef REG16 #undef REG #undef LRI -- cgit v1.2.3 From 008aa86a09ba623e563d7777353f6ca2a391424d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 21 Mar 2024 14:38:18 -0700 Subject: drm/xe: Remove redundant functions to get xe xe_device.h implements these helpers, just use them. Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240321213818.72311-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index c17cce53f19d..aba01edffacd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -7,6 +7,7 @@ #include #include +#include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_pm.h" @@ -569,18 +570,6 @@ static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) kfree(kobj); } -#include "xe_pm.h" - -static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) -{ - return pci_get_drvdata(pdev); -} - -static inline struct xe_device *to_xe_device(const struct drm_device *dev) -{ - return container_of(dev, struct xe_device, drm); -} - static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) -- cgit v1.2.3 From cf03825bdd190b7ab323f797278a00fbf163d889 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Fri, 22 Mar 2024 12:14:55 -0700 Subject: drm/xe: Use FIELD_PREP for lrc descriptor Use FIELD_PREP for setting lrc descriptor fields instead of shifting values to fields. v2: Use ULL macro variants v3: Do not use FIELD_PREP for 1-bit values Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240322191455.7613-1-niranjana.vishwanathapura@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index db0793273de0..2ba111b89a47 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -26,13 +26,13 @@ #include "xe_sriov.h" #include "xe_vm.h" -#define LRC_VALID (1 << 0) -#define LRC_PRIVILEGE (1 << 8) -#define LRC_ADDRESSING_MODE_SHIFT 3 +#define LRC_VALID BIT_ULL(0) +#define LRC_PRIVILEGE BIT_ULL(8) +#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) #define LRC_LEGACY_64B_CONTEXT 3 -#define ENGINE_CLASS_SHIFT 61 -#define ENGINE_INSTANCE_SHIFT 48 +#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) +#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) struct xe_lrc_snapshot { struct xe_bo *lrc_bo; @@ -796,7 +796,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; - lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; + lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); /* TODO: Priority */ /* While this appears to have something about privileged batches or @@ -806,8 +806,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->desc |= LRC_PRIVILEGE; if (GRAPHICS_VERx100(xe) < 1250) { - lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; - lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; + lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); + lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); } arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; -- cgit v1.2.3 From 0bd25f78c4fc97ec61ebaf6147973ddc2e15c7b6 Mon Sep 17 00:00:00 2001 From: Ravi Kumar Vodapalli Date: Tue, 26 Mar 2024 16:08:25 +0530 Subject: drm/xe: Add new PCI IDs to DG2 platform New PCI IDs are added in Bspec for DG2 platform, add them in driver Bspec: 44477 Signed-off-by: Ravi Kumar Vodapalli Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240326103825.3832879-1-ravi.kumar.vodapalli@intel.com --- include/drm/xe_pciids.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index bc7cbef6e9d8..adb37bc541e4 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -134,7 +134,9 @@ MACRO__(0x5692, ## __VA_ARGS__), \ MACRO__(0x56A0, ## __VA_ARGS__), \ MACRO__(0x56A1, ## __VA_ARGS__), \ - MACRO__(0x56A2, ## __VA_ARGS__) + MACRO__(0x56A2, ## __VA_ARGS__), \ + MACRO__(0x56BE, ## __VA_ARGS__), \ + MACRO__(0x56BF, ## __VA_ARGS__) #define XE_DG2_G11_IDS(MACRO__, ...) \ MACRO__(0x5693, ## __VA_ARGS__), \ -- cgit v1.2.3 From 4b217c7fa6ba8b80fcc5cd36086d7ee51b2cc54f Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Mon, 25 Mar 2024 16:56:02 -0700 Subject: drm/xe/guc: Remove explicit shutdown of SLPC SLPC shutdown is called in reset and suspend paths. In the reset path, it is possible that the H2G call gets lost as GuC is in the process of being reset. There is no value in stopping SLPC when it will happen anyways. In the suspend path, we disable communication with GuC, so there is no need to explicitly shutdown SLPC. v2: Rebase Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240325235602.1155486-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index b242af8ed59f..9c110537d135 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -145,25 +145,6 @@ static int pc_action_reset(struct xe_guc_pc *pc) return ret; } -static int pc_action_shutdown(struct xe_guc_pc *pc) -{ - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; - u32 action[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2), - xe_bo_ggtt_addr(pc->bo), - 0, - }; - - ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe", - ERR_PTR(ret)); - - return ret; -} - static int pc_action_query_task_state(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; @@ -893,7 +874,6 @@ out: int xe_guc_pc_stop(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); - int ret; if (xe->info.skip_guc_pc) { xe_gt_idle_disable_c6(pc_to_gt(pc)); @@ -904,15 +884,6 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) pc->freq_ready = false; mutex_unlock(&pc->freq_lock); - ret = pc_action_shutdown(pc); - if (ret) - return ret; - - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) { - drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); - return -EIO; - } - return 0; } -- cgit v1.2.3 From 5dffaa1bb94a6bc75393476fbe3c8a704ff4fcf8 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 21 Mar 2024 17:11:42 +0100 Subject: drm/xe: Create a helper function to init job's user fence Refactor xe_sync_entry_signal so it doesn't have to modify xe_sched_job struct instead create a new helper function to set user fence values for a job. v2: Move the sync type check to xe_sched_job_init_user_fence(Lucas) Cc: Lucas De Marchi Cc: Matthew Auld Cc: Matthew Brost Cc: Michal Wajdeczko Signed-off-by: Nirmoy Das Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240321161142.4954-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec.c | 9 +++++---- drivers/gpu/drm/xe/xe_sched_job.c | 18 ++++++++++++++++++ drivers/gpu/drm/xe/xe_sched_job.h | 3 +++ drivers/gpu/drm/xe/xe_sync.c | 7 +------ drivers/gpu/drm/xe/xe_sync.h | 1 - drivers/gpu/drm/xe/xe_vm.c | 10 +++++----- 6 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7692ebfe7d47..9d53ef8c49cc 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -249,7 +249,7 @@ retry: goto err_unlock_list; } for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); xe_exec_queue_last_fence_set(q, vm, fence); dma_fence_put(fence); } @@ -359,9 +359,10 @@ retry: drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], job, - &job->drm.s_fence->finished); + for (i = 0; i < num_syncs; i++) { + xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished); + xe_sched_job_init_user_fence(job, &syncs[i]); + } if (xe_exec_queue_is_lr(q)) q->ring_ops->emit_job(job); diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 8151ddafb940..add5a8b89be8 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -5,6 +5,7 @@ #include "xe_sched_job.h" +#include #include #include @@ -15,6 +16,7 @@ #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" +#include "xe_sync_types.h" #include "xe_trace.h" #include "xe_vm.h" @@ -278,6 +280,22 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) return drm_sched_job_add_dependency(&job->drm, fence); } +/** + * xe_sched_job_init_user_fence - Initialize user_fence for the job + * @job: job whose user_fence needs an init + * @sync: sync to be use to init user_fence + */ +void xe_sched_job_init_user_fence(struct xe_sched_job *job, + struct xe_sync_entry *sync) +{ + if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE) + return; + + job->user_fence.used = true; + job->user_fence.addr = sync->addr; + job->user_fence.value = sync->timeline_value; +} + struct xe_sched_job_snapshot * xe_sched_job_snapshot_capture(struct xe_sched_job *job) { diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index f1a660648cf0..c75018f4660d 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -10,6 +10,7 @@ struct drm_printer; struct xe_vm; +struct xe_sync_entry; #define XE_SCHED_HANG_LIMIT 1 #define XE_SCHED_JOB_TIMEOUT LONG_MAX @@ -58,6 +59,8 @@ void xe_sched_job_arm(struct xe_sched_job *job); void xe_sched_job_push(struct xe_sched_job *job); int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm); +void xe_sched_job_init_user_fence(struct xe_sched_job *job, + struct xe_sync_entry *sync); static inline struct xe_sched_job * to_xe_sched_job(struct drm_sched_job *drm) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 02c9577fe418..65f1f1628235 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -224,8 +224,7 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) return 0; } -void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, - struct dma_fence *fence) +void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) { if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)) return; @@ -254,10 +253,6 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, user_fence_put(sync->ufence); dma_fence_put(fence); } - } else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) { - job->user_fence.used = true; - job->user_fence.addr = sync->addr; - job->user_fence.value = sync->timeline_value; } } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 0fd0d51208e6..3e03396af2c6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -26,7 +26,6 @@ int xe_sync_entry_wait(struct xe_sync_entry *sync); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); void xe_sync_entry_signal(struct xe_sync_entry *sync, - struct xe_sched_job *job, struct dma_fence *fence); void xe_sync_entry_cleanup(struct xe_sync_entry *sync); struct dma_fence * diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d82d7cd27123..694fbb546372 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1700,7 +1700,7 @@ next: xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; if (last_op) { for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); } return fence; @@ -1774,7 +1774,7 @@ next: if (last_op) { for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, + xe_sync_entry_signal(&syncs[i], cf ? &cf->base : fence); } @@ -1835,7 +1835,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); if (last_op) { for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); } } @@ -2056,7 +2056,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct dma_fence *fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); dma_fence_put(fence); } } @@ -2934,7 +2934,7 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, return PTR_ERR(fence); for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, fence); -- cgit v1.2.3 From 7da3f561cbdf16bb853df5c779b09b4cb3d4c9e9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 26 Mar 2024 14:10:42 +0100 Subject: drm/xe: Move HW GGTT definitions to dedicated file It's better to keep all hardware GGTT definitions separated from the driver code. It also helps to avoid duplicated definitions. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240326131042.319-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/regs/xe_gtt_defs.h | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.c | 7 +------ drivers/gpu/drm/xe/xe_guc.c | 4 ++-- 3 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_gtt_defs.h diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h new file mode 100644 index 000000000000..9196d71bad37 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GTT_DEFS_H_ +#define _XE_GTT_DEFS_H_ + +#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) +#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) + +#define GUC_GGTT_TOP 0xFEE00000 + +#endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index f54523d7d03c..f659af221bd8 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -12,6 +12,7 @@ #include #include "regs/xe_gt_regs.h" +#include "regs/xe_gtt_defs.h" #include "regs/xe_regs.h" #include "xe_assert.h" #include "xe_bo.h" @@ -23,12 +24,6 @@ #include "xe_sriov.h" #include "xe_wopcm.h" -#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) -#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) - -/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ -#define GUC_GGTT_TOP 0xFEE00000 - static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index c558d978462a..77be3bc2d7c0 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -12,6 +12,7 @@ #include "abi/guc_actions_abi.h" #include "abi/guc_errors_abi.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_gtt_defs.h" #include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_device.h" @@ -33,14 +34,13 @@ #include "xe_wa.h" #include "xe_wopcm.h" -/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ -#define GUC_GGTT_TOP 0xFEE00000 static u32 guc_bo_ggtt_addr(struct xe_guc *guc, struct xe_bo *bo) { struct xe_device *xe = guc_to_xe(guc); u32 addr = xe_bo_ggtt_addr(bo); + /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); -- cgit v1.2.3 From 59058f2af9ca17d6c3113f6bbf93f6389fd4d0ea Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 13 Feb 2024 22:49:08 +0100 Subject: drm/xe/guc: Fix include guard for SR-IOV ABI Use include guard macro name that follows naming used by the other GuC ABI files. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240213214908.1481-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 5496a5890847..fe7a93ce3aa5 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -3,8 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#ifndef _GUC_ACTIONS_PF_ABI_H -#define _GUC_ACTIONS_PF_ABI_H +#ifndef _ABI_GUC_ACTIONS_SRIOV_ABI_H +#define _ABI_GUC_ACTIONS_SRIOV_ABI_H #include "guc_communication_ctb_abi.h" -- cgit v1.2.3 From f88beeed82700697745aa3290f5a12c7b1b1bbe7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 13 Mar 2024 23:11:09 +0100 Subject: drm/xe/guc: Move GUC_ID_MAX definition to GuC ABI header This macro represents GuC firmware capability and shall be defined in the firmware ABI header. Move it to xe_guc_fwif.h file. Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240313221112.1089-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_fwif.h | 2 ++ drivers/gpu/drm/xe/xe_guc_submit.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index c281fdbfd2d6..5474025271e3 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -14,6 +14,8 @@ #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 #define G2H_LEN_DW_TLB_INVALIDATE 3 +#define GUC_ID_MAX 65535 + #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ce46ce22fa5f..03c167624340 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -242,7 +242,6 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) mutex_destroy(&guc->submission_state.lock); } -#define GUC_ID_MAX 65535 #define GUC_ID_NUMBER_MLRC 4096 #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC -- cgit v1.2.3 From 68fac8ab0f9d51abd93edee38e8dadf2705c0b4f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 13 Mar 2024 23:11:10 +0100 Subject: drm/xe/guc: Introduce GuC context ID Manager While we are already managing GuC IDs directly in GuC submission code, using bitmap() for MLRC and ida() for SLRC, this code can't be easily extended to meet additional requirements for SR-IOV use cases, like limited number of IDs available on VFs, or ID range reservation for provisioning VFs by the PF. Add a separate component for managing GuC IDs, that will replace existing ID management. Start with bitmap() based implementation that could be optimized later based on perf data. Cc: Matthew Brost Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240313221112.1089-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_guc_id_mgr.c | 275 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_id_mgr.h | 22 +++ drivers/gpu/drm/xe/xe_guc_types.h | 17 +++ 4 files changed, 315 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_guc_id_mgr.c create mode 100644 drivers/gpu/drm/xe/xe_guc_id_mgr.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 3c3e67885559..705c0eaf6e71 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -97,6 +97,7 @@ xe-y += xe_bb.o \ xe_guc_db_mgr.o \ xe_guc_debugfs.o \ xe_guc_hwconfig.o \ + xe_guc_id_mgr.o \ xe_guc_log.o \ xe_guc_pc.o \ xe_guc_submit.o \ diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c new file mode 100644 index 000000000000..bee49bc908bb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include +#include + +#include + +#include "xe_assert.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_id_mgr.h" +#include "xe_guc_types.h" + +static struct xe_guc *idm_to_guc(struct xe_guc_id_mgr *idm) +{ + return container_of(idm, struct xe_guc, submission_state.idm); +} + +static struct xe_gt *idm_to_gt(struct xe_guc_id_mgr *idm) +{ + return guc_to_gt(idm_to_guc(idm)); +} + +static struct xe_device *idm_to_xe(struct xe_guc_id_mgr *idm) +{ + return gt_to_xe(idm_to_gt(idm)); +} + +#define idm_assert(idm, cond) xe_gt_assert(idm_to_gt(idm), cond) +#define idm_mutex(idm) (&idm_to_guc(idm)->submission_state.lock) + +static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent); + +static void __fini_idm(struct drm_device *drm, void *arg) +{ + struct xe_guc_id_mgr *idm = arg; + + mutex_lock(idm_mutex(idm)); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + unsigned int weight = bitmap_weight(idm->bitmap, idm->total); + + if (weight) { + struct drm_printer p = xe_gt_info_printer(idm_to_gt(idm)); + + xe_gt_err(idm_to_gt(idm), "GUC ID manager unclean (%u/%u)\n", + weight, idm->total); + idm_print_locked(idm, &p, 1); + } + } + + bitmap_free(idm->bitmap); + idm->bitmap = NULL; + idm->total = 0; + idm->used = 0; + + mutex_unlock(idm_mutex(idm)); +} + +/** + * xe_guc_id_mgr_init() - Initialize GuC context ID Manager. + * @idm: the &xe_guc_id_mgr to initialize + * @limit: number of IDs to manage + * + * The bare-metal or PF driver can pass ~0 as &limit to indicate that all + * context IDs supported by the GuC firmware are available for use. + * + * Only VF drivers will have to provide explicit number of context IDs + * that they can use. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit) +{ + int ret; + + idm_assert(idm, !idm->bitmap); + idm_assert(idm, !idm->total); + idm_assert(idm, !idm->used); + + if (limit == ~0) + limit = GUC_ID_MAX; + else if (limit > GUC_ID_MAX) + return -ERANGE; + else if (!limit) + return -EINVAL; + + idm->bitmap = bitmap_zalloc(limit, GFP_KERNEL); + if (!idm->bitmap) + return -ENOMEM; + idm->total = limit; + + ret = drmm_add_action_or_reset(&idm_to_xe(idm)->drm, __fini_idm, idm); + if (ret) + return ret; + + xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total); + return 0; +} + +static unsigned int find_last_zero_area(unsigned long *bitmap, + unsigned int total, + unsigned int count) +{ + unsigned int found = total; + unsigned int rs, re, range; + + for_each_clear_bitrange(rs, re, bitmap, total) { + range = re - rs; + if (range < count) + continue; + found = rs + (range - count); + } + return found; +} + +static int idm_reserve_chunk_locked(struct xe_guc_id_mgr *idm, + unsigned int count, unsigned int retain) +{ + int id; + + idm_assert(idm, count); + lockdep_assert_held(idm_mutex(idm)); + + if (!idm->total) + return -ENODATA; + + if (retain) { + /* + * For IDs reservations (used on PF for VFs) we want to make + * sure there will be at least 'retain' available for the PF + */ + if (idm->used + count + retain > idm->total) + return -EDQUOT; + /* + * ... and we want to reserve highest IDs close to the end. + */ + id = find_last_zero_area(idm->bitmap, idm->total, count); + } else { + /* + * For regular IDs reservations (used by submission code) + * we start searching from the lower range of IDs. + */ + id = bitmap_find_next_zero_area(idm->bitmap, idm->total, 0, count, 0); + } + if (id >= idm->total) + return -ENOSPC; + + bitmap_set(idm->bitmap, id, count); + idm->used += count; + + return id; +} + +static void idm_release_chunk_locked(struct xe_guc_id_mgr *idm, + unsigned int start, unsigned int count) +{ + idm_assert(idm, count); + idm_assert(idm, count <= idm->used); + idm_assert(idm, start < idm->total); + idm_assert(idm, start + count - 1 < idm->total); + lockdep_assert_held(idm_mutex(idm)); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + unsigned int n; + + for (n = 0; n < count; n++) + idm_assert(idm, test_bit(start + n, idm->bitmap)); + } + bitmap_clear(idm->bitmap, start, count); + idm->used -= count; +} + +/** + * xe_guc_id_mgr_reserve_locked() - Reserve one or more GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @count: number of IDs to allocate (can't be 0) + * + * This function is dedicated for the use by the GuC submission code, + * where submission lock is already taken. + * + * Return: ID of allocated GuC context or a negative error code on failure. + */ +int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count) +{ + return idm_reserve_chunk_locked(idm, count, 0); +} + +/** + * xe_guc_id_mgr_release_locked() - Release one or more GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @id: the GuC context ID to release + * @count: number of IDs to release (can't be 0) + * + * This function is dedicated for the use by the GuC submission code, + * where submission lock is already taken. + */ +void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id, + unsigned int count) +{ + return idm_release_chunk_locked(idm, id, count); +} + +/** + * xe_guc_id_mgr_reserve() - Reserve a range of GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @count: number of GuC context IDs to reserve (can't be 0) + * @retain: number of GuC context IDs to keep available (can't be 0) + * + * This function is dedicated for the use by the PF driver which expects that + * reserved range of IDs will be contiguous and that there will be at least + * &retain IDs still available for the PF after this reservation. + * + * Return: starting ID of the allocated GuC context ID range or + * a negative error code on failure. + */ +int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm, + unsigned int count, unsigned int retain) +{ + int ret; + + idm_assert(idm, count); + idm_assert(idm, retain); + + mutex_lock(idm_mutex(idm)); + ret = idm_reserve_chunk_locked(idm, count, retain); + mutex_unlock(idm_mutex(idm)); + + return ret; +} + +/** + * xe_guc_id_mgr_release() - Release a range of GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @start: the starting ID of GuC context range to release + * @count: number of GuC context IDs to release + */ +void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm, + unsigned int start, unsigned int count) +{ + mutex_lock(idm_mutex(idm)); + idm_release_chunk_locked(idm, start, count); + mutex_unlock(idm_mutex(idm)); +} + +static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent) +{ + unsigned int rs, re; + + lockdep_assert_held(idm_mutex(idm)); + + drm_printf_indent(p, indent, "total %u\n", idm->total); + if (!idm->bitmap) + return; + + drm_printf_indent(p, indent, "used %u\n", idm->used); + for_each_set_bitrange(rs, re, idm->bitmap, idm->total) + drm_printf_indent(p, indent, "range %u..%u (%u)\n", rs, re - 1, re - rs); +} + +/** + * xe_guc_id_mgr_print() - Print status of GuC ID Manager. + * @idm: the &xe_guc_id_mgr to print + * @p: the &drm_printer to print to + * @indent: tab indentation level + */ +void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent) +{ + mutex_lock(idm_mutex(idm)); + idm_print_locked(idm, p, indent); + mutex_unlock(idm_mutex(idm)); +} diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.h b/drivers/gpu/drm/xe/xe_guc_id_mgr.h new file mode 100644 index 000000000000..368f8c80e4c7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_ID_MGR_H_ +#define _XE_GUC_ID_MGR_H_ + +struct drm_printer; +struct xe_guc_id_mgr; + +int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int count); + +int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count); +void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id, unsigned int count); + +int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm, unsigned int count, unsigned int retain); +void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm, unsigned int start, unsigned int count); + +void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index edcd1a950bd3..69be1fb83047 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -31,6 +31,21 @@ struct xe_guc_db_mgr { unsigned long *bitmap; }; +/** + * struct xe_guc_id_mgr - GuC context ID Manager. + * + * Note: GuC context ID Manager is relying on &xe_guc::submission_state.lock + * to protect its members. + */ +struct xe_guc_id_mgr { + /** @bitmap: bitmap to track allocated IDs */ + unsigned long *bitmap; + /** @total: total number of IDs being managed */ + unsigned int total; + /** @used: number of IDs currently in use */ + unsigned int used; +}; + /** * struct xe_guc - Graphic micro controller */ @@ -49,6 +64,8 @@ struct xe_guc { struct xe_guc_db_mgr dbm; /** @submission_state: GuC submission state */ struct { + /** @submission_state.idm: GuC context ID Manager */ + struct xe_guc_id_mgr idm; /** @submission_state.exec_queue_lookup: Lookup an xe_engine from guc_id */ struct xarray exec_queue_lookup; /** @submission_state.guc_ids: used to allocate new guc_ids, single-lrc */ -- cgit v1.2.3 From f4fb157cd0cc4b48aefef89189daf2658cbfc347 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 13 Mar 2024 23:11:11 +0100 Subject: drm/xe/kunit: Add basic tests for GuC context ID Manager Before we switch-over submission code to use new GuC context ID Manager, lets add some kunit tests to make sure that ID manager works as expected. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240313221112.1089-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c | 136 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_id_mgr.c | 4 + 2 files changed, 140 insertions(+) create mode 100644 drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c diff --git a/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c new file mode 100644 index 000000000000..ee30a1939eb0 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_kunit_helpers.h" + +static int guc_id_mgr_test_init(struct kunit *test) +{ + struct xe_guc_id_mgr *idm; + + xe_kunit_helper_xe_device_test_init(test); + idm = &xe_device_get_gt(test->priv, 0)->uc.guc.submission_state.idm; + + mutex_init(idm_mutex(idm)); + test->priv = idm; + return 0; +} + +static void bad_init(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + + KUNIT_EXPECT_EQ(test, -EINVAL, xe_guc_id_mgr_init(idm, 0)); + KUNIT_EXPECT_EQ(test, -ERANGE, xe_guc_id_mgr_init(idm, GUC_ID_MAX + 1)); +} + +static void no_init(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + + mutex_lock(idm_mutex(idm)); + KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve_locked(idm, 0)); + mutex_unlock(idm_mutex(idm)); + + KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve(idm, 1, 1)); +} + +static void init_fini(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1)); + KUNIT_EXPECT_NOT_NULL(test, idm->bitmap); + KUNIT_EXPECT_EQ(test, idm->total, GUC_ID_MAX); + __fini_idm(NULL, idm); + KUNIT_EXPECT_NULL(test, idm->bitmap); + KUNIT_EXPECT_EQ(test, idm->total, 0); +} + +static void check_used(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2)); + + mutex_lock(idm_mutex(idm)); + + for (n = 0; n < idm->total; n++) { + kunit_info(test, "n=%u", n); + KUNIT_EXPECT_EQ(test, idm->used, n); + KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 0), 0); + KUNIT_EXPECT_EQ(test, idm->used, n + 1); + } + KUNIT_EXPECT_EQ(test, idm->used, idm->total); + idm_release_chunk_locked(idm, 0, idm->used); + KUNIT_EXPECT_EQ(test, idm->used, 0); + + mutex_unlock(idm_mutex(idm)); +} + +static void check_quota(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2)); + + mutex_lock(idm_mutex(idm)); + + for (n = 0; n < idm->total - 1; n++) { + kunit_info(test, "n=%u", n); + KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total), -EDQUOT); + KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total - n), -EDQUOT); + KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, idm->total - n, 1), -EDQUOT); + KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 1), 0); + } + KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0)); + KUNIT_EXPECT_EQ(test, idm->used, idm->total); + idm_release_chunk_locked(idm, 0, idm->total); + KUNIT_EXPECT_EQ(test, idm->used, 0); + + mutex_unlock(idm_mutex(idm)); +} + +static void check_all(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1)); + + mutex_lock(idm_mutex(idm)); + + for (n = 0; n < idm->total; n++) + KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0)); + KUNIT_EXPECT_EQ(test, idm->used, idm->total); + for (n = 0; n < idm->total; n++) + idm_release_chunk_locked(idm, n, 1); + + mutex_unlock(idm_mutex(idm)); +} + +static struct kunit_case guc_id_mgr_test_cases[] = { + KUNIT_CASE(bad_init), + KUNIT_CASE(no_init), + KUNIT_CASE(init_fini), + KUNIT_CASE(check_used), + KUNIT_CASE(check_quota), + KUNIT_CASE_SLOW(check_all), + {} +}; + +static struct kunit_suite guc_id_mgr_suite = { + .name = "guc_idm", + .test_cases = guc_id_mgr_test_cases, + + .init = guc_id_mgr_test_init, + .exit = NULL, +}; + +kunit_test_suites(&guc_id_mgr_suite); diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c index bee49bc908bb..0fb7c6b78c31 100644 --- a/drivers/gpu/drm/xe/xe_guc_id_mgr.c +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c @@ -273,3 +273,7 @@ void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int i idm_print_locked(idm, p, indent); mutex_unlock(idm_mutex(idm)); } + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_id_mgr_test.c" +#endif -- cgit v1.2.3 From e6e7eff6275cb5d1e78df12f61cd083b819de381 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 13 Mar 2024 23:11:12 +0100 Subject: drm/xe/guc: Use GuC ID Manager in submission code We are ready to replace private guc_ids management code with separate GuC ID Manager that can be shared with upcoming SR-IOV PF provisioning code. Cc: Matthew Brost Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240313221112.1089-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 42 +++++++++----------------------------- drivers/gpu/drm/xe/xe_guc_types.h | 4 ---- 2 files changed, 10 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 03c167624340..13b7e195c7b5 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -27,6 +27,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" +#include "xe_guc_id_mgr.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -236,16 +237,10 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) struct xe_guc *guc = arg; xa_destroy(&guc->submission_state.exec_queue_lookup); - ida_destroy(&guc->submission_state.guc_ids); - bitmap_free(guc->submission_state.guc_ids_bitmap); free_submit_wq(guc); mutex_destroy(&guc->submission_state.lock); } -#define GUC_ID_NUMBER_MLRC 4096 -#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) -#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC - static const struct xe_exec_queue_ops guc_exec_queue_ops; static void primelockdep(struct xe_guc *guc) @@ -268,22 +263,14 @@ int xe_guc_submit_init(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); int err; - guc->submission_state.guc_ids_bitmap = - bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; - err = alloc_submit_wq(guc); - if (err) { - bitmap_free(guc->submission_state.guc_ids_bitmap); + if (err) return err; - } gt->exec_queue_ops = &guc_exec_queue_ops; mutex_init(&guc->submission_state.lock); xa_init(&guc->submission_state.exec_queue_lookup); - ida_init(&guc->submission_state.guc_ids); spin_lock_init(&guc->submission_state.suspend.lock); guc->submission_state.suspend.context = dma_fence_context_alloc(1); @@ -294,6 +281,10 @@ int xe_guc_submit_init(struct xe_guc *guc) if (err) return err; + err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0); + if (err) + return err; + return 0; } @@ -306,12 +297,8 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa for (i = 0; i < xa_count; ++i) xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); - if (xe_exec_queue_is_parallel(q)) - bitmap_release_region(guc->submission_state.guc_ids_bitmap, - q->guc->id - GUC_ID_START_MLRC, - order_base_2(q->width)); - else - ida_free(&guc->submission_state.guc_ids, q->guc->id); + xe_guc_id_mgr_release_locked(&guc->submission_state.idm, + q->guc->id, q->width); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) @@ -329,21 +316,12 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) */ lockdep_assert_held(&guc->submission_state.lock); - if (xe_exec_queue_is_parallel(q)) { - void *bitmap = guc->submission_state.guc_ids_bitmap; - - ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, - order_base_2(q->width)); - } else { - ret = ida_alloc_max(&guc->submission_state.guc_ids, - GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT); - } + ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, + q->width); if (ret < 0) return ret; q->guc->id = ret; - if (xe_exec_queue_is_parallel(q)) - q->guc->id += GUC_ID_START_MLRC; for (i = 0; i < q->width; ++i) { ptr = xa_store(&guc->submission_state.exec_queue_lookup, diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 69be1fb83047..82bd93f7867d 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -68,10 +68,6 @@ struct xe_guc { struct xe_guc_id_mgr idm; /** @submission_state.exec_queue_lookup: Lookup an xe_engine from guc_id */ struct xarray exec_queue_lookup; - /** @submission_state.guc_ids: used to allocate new guc_ids, single-lrc */ - struct ida guc_ids; - /** @submission_state.guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ - unsigned long *guc_ids_bitmap; /** @submission_state.stopped: submissions are stopped */ atomic_t stopped; /** @submission_state.lock: protects submission state */ -- cgit v1.2.3 From 4fc4899e86f7afbd09f4bcb899f0fc57e0296e62 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 27 Mar 2024 10:11:33 +0100 Subject: drm/xe: Use ring ops TLB invalidation for rebinds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For each rebind we insert a GuC TLB invalidation and add a corresponding unordered TLB invalidation fence. This might add a huge number of TLB invalidation fences to wait for so rather than doing that, defer the TLB invalidation to the next ring ops for each affected exec queue. Since the TLB is invalidated on exec_queue switch, we need to invalidate once for each affected exec_queue. v2: - Simplify if-statements around the tlb_flush_seqno. (Matthew Brost) - Add some comments and asserts. Fixes: 5387e865d90e ("drm/xe: Add TLB invalidation fence after rebinds issued from execs") Cc: Matthew Brost Cc: # v6.8+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240327091136.3271-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_exec_queue_types.h | 5 +++++ drivers/gpu/drm/xe/xe_pt.c | 6 ++++-- drivers/gpu/drm/xe/xe_ring_ops.c | 11 ++++------- drivers/gpu/drm/xe/xe_sched_job.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_sched_job_types.h | 2 ++ drivers/gpu/drm/xe/xe_vm_types.h | 5 +++++ 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 9cc689f50db0..ee78d497d838 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -146,6 +146,11 @@ struct xe_exec_queue { const struct xe_ring_ops *ring_ops; /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ struct drm_sched_entity *entity; + /** + * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed + * Protected by @vm's resv. Unused if @vm == NULL. + */ + u64 tlb_flush_seqno; /** @lrc: logical ring context for this exec queue */ struct xe_lrc lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 8d3922d2206e..37117752cfc9 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1254,11 +1254,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue * non-faulting LR, in particular on user-space batch buffer chaining, * it needs to be done here. */ - if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || - (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { + if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); + } else if (rebind && !xe_vm_in_lr_mode(vm)) { + /* We bump also if batch_invalidate_tlb is true */ + vm->tlb_flush_seqno++; } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index c4edffcd4a32..5b2b37b59813 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - struct xe_vm *vm = job->q->vm; struct xe_gt *gt = job->q->gt; - if (vm && vm->batch_invalidate_tlb) { + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); @@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; - struct xe_vm *vm = job->q->vm; dw[i++] = preparser_disable(true); @@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); } - if (vm && vm->batch_invalidate_tlb) + if (job->ring_ops_flush_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); dw[i++] = preparser_disable(false); - if (!vm || !vm->batch_invalidate_tlb) + if (!job->ring_ops_flush_tlb) i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); @@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - struct xe_vm *vm = job->q->vm; u32 mask_flags = 0; dw[i++] = preparser_disable(true); @@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ - i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); + i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index add5a8b89be8..80daee910ae9 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -252,6 +252,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job) void xe_sched_job_arm(struct xe_sched_job *job) { + struct xe_exec_queue *q = job->q; + struct xe_vm *vm = q->vm; + + if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) && + (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) { + xe_vm_assert_held(vm); + q->tlb_flush_seqno = vm->tlb_flush_seqno; + job->ring_ops_flush_tlb = true; + } + drm_sched_job_arm(&job->drm); } diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index b1d83da50a53..5e12724219fd 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -39,6 +39,8 @@ struct xe_sched_job { } user_fence; /** @migrate_flush_flags: Additional flush flags for migration jobs */ u32 migrate_flush_flags; + /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ + bool ring_ops_flush_tlb; /** @batch_addr: batch buffer address of job */ u64 batch_addr[]; }; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index ae5fb565f6bf..5747f136d24d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -264,6 +264,11 @@ struct xe_vm { bool capture_once; } error_capture; + /** + * @tlb_flush_seqno: Required TLB flush seqno for the next exec. + * protected by the vm resv. + */ + u64 tlb_flush_seqno; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ -- cgit v1.2.3 From 5a091aff50b780ae29c7faf70a7a6c21c98a54c4 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 27 Mar 2024 10:11:34 +0100 Subject: drm/xe: Rework rebinding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of handling the vm's rebind fence separately, which is error prone if they are not strictly ordered, attach rebind fences as kernel fences to the vm's resv. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Cc: # v6.8+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240327091136.3271-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_exec.c | 31 +++---------------------------- drivers/gpu/drm/xe/xe_pt.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 27 +++++++++------------------ drivers/gpu/drm/xe/xe_vm.h | 2 +- drivers/gpu/drm/xe/xe_vm_types.h | 3 --- 5 files changed, 14 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 9d53ef8c49cc..b7e26e8e6472 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -152,7 +152,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs = 0, num_ufence = 0; struct xe_sched_job *job; - struct dma_fence *rebind_fence; struct xe_vm *vm; bool write_locked, skip_retry = false; ktime_t end = 0; @@ -294,35 +293,11 @@ retry: * Rebind any invalidated userptr or evicted BOs in the VM, non-compute * VM mode only. */ - rebind_fence = xe_vm_rebind(vm, false); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); + err = xe_vm_rebind(vm, false); + if (err) goto err_put_job; - } - - /* - * We store the rebind_fence in the VM so subsequent execs don't get - * scheduled before the rebinds of userptrs / evicted BOs is complete. - */ - if (rebind_fence) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = rebind_fence; - } - if (vm->rebind_fence) { - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &vm->rebind_fence->flags)) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = NULL; - } else { - dma_fence_get(vm->rebind_fence); - err = drm_sched_job_add_dependency(&job->drm, - vm->rebind_fence); - if (err) - goto err_put_job; - } - } - /* Wait behind munmap style rebinds */ + /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, xe_vm_resv(vm), diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 37117752cfc9..632c1919471d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1299,7 +1299,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue } /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && + dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || last_munmap_rebind ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 694fbb546372..e995196862db 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -522,7 +522,6 @@ static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); struct drm_exec exec; - struct dma_fence *rebind_fence; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); ktime_t end = 0; @@ -568,18 +567,11 @@ retry: if (err) goto out_unlock; - rebind_fence = xe_vm_rebind(vm, true); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); + err = xe_vm_rebind(vm, true); + if (err) goto out_unlock; - } - - if (rebind_fence) { - dma_fence_wait(rebind_fence, false); - dma_fence_put(rebind_fence); - } - /* Wait on munmap style VM unbinds */ + /* Wait on rebinds and munmap style VM unbinds */ wait = dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); @@ -777,14 +769,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { - struct dma_fence *fence = NULL; + struct dma_fence *fence; struct xe_vma *vma, *next; lockdep_assert_held(&vm->lock); if (xe_vm_in_lr_mode(vm) && !rebind_worker) - return NULL; + return 0; xe_vm_assert_held(vm); list_for_each_entry_safe(vma, next, &vm->rebind_list, @@ -792,17 +784,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) xe_assert(vm->xe, vma->tile_present); list_del_init(&vma->combined_links.rebind); - dma_fence_put(fence); if (rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); if (IS_ERR(fence)) - return fence; + return PTR_ERR(fence); + dma_fence_put(fence); } - return fence; + return 0; } static void xe_vma_free(struct xe_vma *vma) @@ -1592,7 +1584,6 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); - dma_fence_put(vm->rebind_fence); kfree(vm); } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 6df1f1c7f85d..4853354336f2 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm); int xe_vm_userptr_check_repin(struct xe_vm *vm); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); int xe_vm_invalidate_vma(struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 5747f136d24d..badf3945083d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -177,9 +177,6 @@ struct xe_vm { */ struct list_head rebind_list; - /** @rebind_fence: rebind fence from execbuf */ - struct dma_fence *rebind_fence; - /** * @destroy_work: worker to destroy VM, needed as a dma_fence signaling * from an irq context can be last put and the destroy needs to be able -- cgit v1.2.3 From 0453f1757501df2e82b66b3183a24bba5a6f8fa3 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 27 Mar 2024 10:11:35 +0100 Subject: drm/xe: Make TLB invalidation fences unordered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They can actually complete out-of-order, so allocate a unique fence context for each fence. Fixes: 5387e865d90e ("drm/xe: Add TLB invalidation fence after rebinds issued from execs") Cc: Matthew Brost Cc: # v6.8+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240327091136.3271-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 1 - drivers/gpu/drm/xe/xe_gt_types.h | 7 ------- drivers/gpu/drm/xe/xe_pt.c | 3 +-- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 25b4111097bc..93df2d7969b3 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -63,7 +63,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); spin_lock_init(>->tlb_invalidation.pending_lock); spin_lock_init(>->tlb_invalidation.lock); - gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, xe_gt_tlb_fence_timeout); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index f6da2ad9719f..2143dffcaf11 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -179,13 +179,6 @@ struct xe_gt { * xe_gt_tlb_fence_timeout after the timeut interval is over. */ struct delayed_work fence_tdr; - /** @tlb_invalidation.fence_context: context for TLB invalidation fences */ - u64 fence_context; - /** - * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by - * tlb_invalidation.lock - */ - u32 fence_seqno; /** @tlb_invalidation.lock: protects TLB invalidation fences */ spinlock_t lock; } tlb_invalidation; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 632c1919471d..d1b999dbc906 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt, spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, >->tlb_invalidation.lock, - gt->tlb_invalidation.fence_context, - ++gt->tlb_invalidation.fence_seqno); + dma_fence_context_alloc(1), 1); spin_unlock_irq(>->tlb_invalidation.lock); INIT_LIST_HEAD(&ifence->base.link); -- cgit v1.2.3 From 7ee7dd6f301341d5b1204fc19fa620d7f7f7e90d Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 27 Mar 2024 10:11:36 +0100 Subject: drm/xe: Move vma rebinding to the drm_exec locking loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebinding might allocate page-table bos, causing evictions. To support blocking locking during these evictions, perform the rebinding in the drm_exec locking loop. Also Reserve fence slots where actually needed rather than trying to predict how many fence slots will be needed over a complete wound-wait transaction. v2: - Remove a leftover call to xe_vm_rebind() (Matt Brost) - Add a helper function xe_vm_validate_rebind() (Matt Brost) v3: - Add comments and squash with previous patch (Matt Brost) Fixes: 24f947d58fe5 ("drm/xe: Use DRM GPUVM helpers for external- and evicted objects") Fixes: 29f424eb8702 ("drm/xe/exec: move fence reservation") Cc: Matthew Auld Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240327091136.3271-5-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_exec.c | 52 +++------------------- drivers/gpu/drm/xe/xe_gt_pagefault.c | 3 +- drivers/gpu/drm/xe/xe_pt.c | 14 ++++++ drivers/gpu/drm/xe/xe_vm.c | 83 +++++++++++++++++++++++++----------- drivers/gpu/drm/xe/xe_vm.h | 6 ++- 5 files changed, 83 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index b7e26e8e6472..97eeb973e897 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -94,48 +94,16 @@ * Unlock all */ +/* + * Add validation and rebinding to the drm_exec locking loop, since both can + * trigger eviction which may require sleeping dma_resv locks. + */ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); - struct drm_gem_object *obj; - unsigned long index; - int num_fences; - int ret; - - ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); - if (ret) - return ret; - - /* - * 1 fence slot for the final submit, and 1 more for every per-tile for - * GPU bind and 1 extra for CPU bind. Note that there are potentially - * many vma per object/dma-resv, however the fence slot will just be - * re-used, since they are largely the same timeline and the seqno - * should be in order. In the case of CPU bind there is dummy fence used - * for all CPU binds, so no need to have a per-tile slot for that. - */ - num_fences = 1 + 1 + vm->xe->info.tile_count; - /* - * We don't know upfront exactly how many fence slots we will need at - * the start of the exec, since the TTM bo_validate above can consume - * numerous fence slots. Also due to how the dma_resv_reserve_fences() - * works it only ensures that at least that many fence slots are - * available i.e if there are already 10 slots available and we reserve - * two more, it can just noop without reserving anything. With this it - * is quite possible that TTM steals some of the fence slots and then - * when it comes time to do the vma binding and final exec stage we are - * lacking enough fence slots, leading to some nasty BUG_ON() when - * adding the fences. Hence just add our own fences here, after the - * validate stage. - */ - drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) { - ret = dma_resv_reserve_fences(obj->resv, num_fences); - if (ret) - return ret; - } - - return 0; + /* The fence slot added here is intended for the exec sched job. */ + return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -289,14 +257,6 @@ retry: goto err_exec; } - /* - * Rebind any invalidated userptr or evicted BOs in the VM, non-compute - * VM mode only. - */ - err = xe_vm_rebind(vm, false); - if (err) - goto err_put_job; - /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 241c294270d9..fa9e9853c53b 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); - unsigned int num_shared = 2; /* slots for bind + move */ int err; - err = xe_vm_prepare_vma(exec, vma, num_shared); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d1b999dbc906..580fe869b414 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1235,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); if (err) goto err; + + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + goto err; + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); @@ -1577,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu struct dma_fence *fence = NULL; struct invalidation_fence *ifence; struct xe_range_fence *rfence; + int err; LLIST_HEAD(deferred); @@ -1594,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, num_entries); + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + return ERR_PTR(err); + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e995196862db..8b32aa5003df 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) return 0; } +/** + * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas + * @vm: The vm for which we are rebinding. + * @exec: The struct drm_exec with the locked GEM objects. + * @num_fences: The number of fences to reserve for the operation, not + * including rebinds and validations. + * + * Validates all evicted gem objects and rebinds their vmas. Note that + * rebindings may cause evictions and hence the validation-rebind + * sequence is rerun until there are no more objects to validate. + * + * Return: 0 on success, negative error code on error. In particular, + * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if + * the drm_exec transaction needs to be restarted. + */ +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gem_object *obj; + unsigned long index; + int ret; + + do { + ret = drm_gpuvm_validate(&vm->gpuvm, exec); + if (ret) + return ret; + + ret = xe_vm_rebind(vm, false); + if (ret) + return ret; + } while (!list_empty(&vm->gpuvm.evict.list)); + + drm_exec_for_each_locked_object(exec, index, obj) { + ret = dma_resv_reserve_fences(obj->resv, num_fences); + if (ret) + return ret; + } + + return 0; +} + static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, bool *done) { int err; - /* - * 1 fence for each preempt fence plus a fence for each tile from a - * possible rebind - */ - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + - vm->xe->info.tile_count); + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); if (err) return err; @@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return 0; } - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); if (err) return err; @@ -515,7 +551,13 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, if (err) return err; - return drm_gpuvm_validate(&vm->gpuvm, exec); + /* + * Add validation and rebinding to the locking loop since both can + * cause evictions which may require blocing dma_resv locks. + * The fence reservation here is intended for the new preempt fences + * we attach at the end of the rebind work. + */ + return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } static void preempt_rebind_work_func(struct work_struct *w) @@ -1000,35 +1042,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } /** - * xe_vm_prepare_vma() - drm_exec utility to lock a vma + * xe_vm_lock_vma() - drm_exec utility to lock a vma * @exec: The drm_exec object we're currently locking for. * @vma: The vma for witch we want to lock the vm resv and any attached * object's resv. - * @num_shared: The number of dma-fence slots to pre-allocate in the - * objects' reservation objects. * * Return: 0 on success, negative error code on error. In particular * may return -EDEADLK on WW transaction contention and -EINTR if * an interruptible wait is terminated by a signal. */ -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared) +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); struct xe_bo *bo = xe_vma_bo(vma); int err; XE_WARN_ON(!vm); - if (num_shared) - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - else - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (!err && bo && !bo->vm) { - if (num_shared) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); - else - err = drm_exec_lock_obj(exec, &bo->ttm.base); - } + + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) + err = drm_exec_lock_obj(exec, &bo->ttm.base); return err; } @@ -1040,7 +1073,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - err = xe_vm_prepare_vma(&exec, vma, 0); + err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; @@ -2506,7 +2539,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, lockdep_assert_held_write(&vm->lock); - err = xe_vm_prepare_vma(exec, vma, 1); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 4853354336f2..306cd0934a19 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared); +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); + +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences); /** * xe_vm_resv() - Return's the vm's reservation object -- cgit v1.2.3 From 451d261a6ecc5a21dd4e34f18cc4c83177d6023f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 27 Mar 2024 19:27:38 +0100 Subject: drm/xe: Separate pure MMIO init from VRAM checkout We can setup root tile registers mapping at the same time as we do early mapping of the entire MMIO BAR and keep mandatory VRAM checkout as a separate step. This will allow us to perform SR-IOV VF mode detection between those two steps using regular MMIO regs access functions. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240327182740.407-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 55 +++++++++++++++++------------------------- drivers/gpu/drm/xe/xe_mmio.h | 2 +- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b0bfe75eb59f..286ebccc3fc8 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -424,7 +424,7 @@ int xe_device_probe_early(struct xe_device *xe) if (err) return err; - err = xe_mmio_root_tile_init(xe); + err = xe_mmio_verify_vram(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 1de9de4f94b6..5d13fc7cb9d2 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -360,32 +360,9 @@ static void mmio_fini(struct drm_device *drm, void *arg) iounmap(xe->mem.vram.mapping); } -static int xe_verify_lmem_ready(struct xe_device *xe) -{ - struct xe_gt *gt = xe_root_mmio_gt(xe); - - if (!IS_DGFX(xe)) - return 0; - - if (IS_SRIOV_VF(xe)) - return 0; - - /* - * The boot firmware initializes local memory and assesses its health. - * If memory training fails, the punit will have been instructed to - * keep the GT powered down; we won't be able to communicate with it - * and we should not continue with driver initialization. - */ - if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { - drm_err(&xe->drm, "VRAM not initialized by firmware\n"); - return -ENODEV; - } - - return 0; -} - int xe_mmio_init(struct xe_device *xe) { + struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); const int mmio_bar = 0; @@ -401,21 +378,33 @@ int xe_mmio_init(struct xe_device *xe) return -EIO; } + /* Setup first tile; other tiles (if present) will be setup later. */ + root_tile->mmio.size = SZ_16M; + root_tile->mmio.regs = xe->mmio.regs; + return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); } -int xe_mmio_root_tile_init(struct xe_device *xe) +int xe_mmio_verify_vram(struct xe_device *xe) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); - int err; + struct xe_gt *gt = xe_root_mmio_gt(xe); - /* Setup first tile; other tiles (if present) will be setup later. */ - root_tile->mmio.size = SZ_16M; - root_tile->mmio.regs = xe->mmio.regs; + if (!IS_DGFX(xe)) + return 0; - err = xe_verify_lmem_ready(xe); - if (err) - return err; + if (IS_SRIOV_VF(xe)) + return 0; + + /* + * The boot firmware initializes local memory and assesses its health. + * If memory training fails, the punit will have been instructed to + * keep the GT powered down; we won't be able to communicate with it + * and we should not continue with driver initialization. + */ + if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { + drm_err(&xe->drm, "VRAM not initialized by firmware\n"); + return -ENODEV; + } return 0; } diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 67ead99f321b..b1680c4a14fb 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -21,7 +21,7 @@ struct xe_device; #define LMEM_BAR 2 int xe_mmio_init(struct xe_device *xe); -int xe_mmio_root_tile_init(struct xe_device *xe); +int xe_mmio_verify_vram(struct xe_device *xe); void xe_mmio_probe_tiles(struct xe_device *xe); u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg); -- cgit v1.2.3 From d79c88c45dae1f1c5f80bd473811c6d053f55828 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 27 Mar 2024 19:27:39 +0100 Subject: drm/xe: Move SR-IOV probe to xe_device_probe_early() SR-IOV mode detection requires access to the MMIO register and this can be done now in xe_device_probe_early(). We can also drop explicit has_sriov parameter as this flag is now already available from xe->info. Signed-off-by: Michal Wajdeczko Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240327182740.407-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 2 -- drivers/gpu/drm/xe/xe_sriov.c | 4 ++-- drivers/gpu/drm/xe/xe_sriov.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 286ebccc3fc8..01bd5ccf05ca 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -424,6 +424,8 @@ int xe_device_probe_early(struct xe_device *xe) if (err) return err; + xe_sriov_probe_early(xe); + err = xe_mmio_verify_vram(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c401d4890386..7ce37b28bfa4 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -738,8 +738,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - xe_sriov_probe_early(xe, desc->has_sriov); - err = xe_device_probe_early(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index f295d91886b1..2bcef998c8a9 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -31,7 +31,6 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) /** * xe_sriov_probe_early - Probe a SR-IOV mode. * @xe: the &xe_device to probe mode on - * @has_sriov: flag indicating hardware support for SR-IOV * * This function should be called only once and as soon as possible during * driver probe to detect whether we are running a SR-IOV Physical Function @@ -40,9 +39,10 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) * SR-IOV PF mode detection is based on PCI @dev_is_pf() function. * SR-IOV VF mode detection is based on dedicated MMIO register read. */ -void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov) +void xe_sriov_probe_early(struct xe_device *xe) { enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE; + bool has_sriov = xe->info.has_sriov; /* TODO: replace with proper mode detection */ xe_assert(xe, !has_sriov); diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 1545552162c9..9e3f58874e98 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -12,7 +12,7 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode); -void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov); +void xe_sriov_probe_early(struct xe_device *xe); int xe_sriov_init(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) -- cgit v1.2.3 From 0613834f3dddb4bf3ff82861dad661b1ede97f7b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 27 Mar 2024 19:27:40 +0100 Subject: drm/xe/vf: Add proper detection of the SR-IOV VF mode SR-IOV VF mode detection is based on testing VF capability bit on the register that is accessible from both the PF and enabled VFs. Bspec: 49904, 53227 Signed-off-by: Michal Wajdeczko Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240327182740.407-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/regs/xe_sriov_regs.h | 3 +++ drivers/gpu/drm/xe/xe_sriov.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h index 58a4e0fad1e1..617ddb84b7fa 100644 --- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h @@ -14,4 +14,7 @@ #define LMEM_EN REG_BIT(31) #define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ +#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) +#define VF_CAP REG_BIT(0) + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 2bcef998c8a9..3e103edf7174 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -5,7 +5,11 @@ #include +#include "regs/xe_sriov_regs.h" + #include "xe_assert.h" +#include "xe_device.h" +#include "xe_mmio.h" #include "xe_sriov.h" /** @@ -28,6 +32,13 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) } } +static bool test_is_vf(struct xe_device *xe) +{ + u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG); + + return value & VF_CAP; +} + /** * xe_sriov_probe_early - Probe a SR-IOV mode. * @xe: the &xe_device to probe mode on @@ -44,8 +55,10 @@ void xe_sriov_probe_early(struct xe_device *xe) enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE; bool has_sriov = xe->info.has_sriov; - /* TODO: replace with proper mode detection */ - xe_assert(xe, !has_sriov); + if (has_sriov) { + if (test_is_vf(xe)) + mode = XE_SRIOV_MODE_VF; + } xe_assert(xe, !xe->sriov.__mode); xe->sriov.__mode = mode; -- cgit v1.2.3 From 476f6c48d1fa6a2253cbb5fdf755727632c03972 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 26 Mar 2024 20:15:17 +0100 Subject: drm/xe/guc: Add VF_STATE_NOTIFY and VF_CONTROL to ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In upcoming patches the PF driver will add support to handle the GUC2PF_VF_STATE_NOTIFY events and to send PF2GUC_VF_CONTROL request messages. Add necessary definitions to our GuC firmware ABI header. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240326191518.363-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 94 ++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index fe7a93ce3aa5..12ee5e9e831f 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -171,4 +171,98 @@ #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN +/** + * DOC: GUC2PF_VF_STATE_NOTIFY + * + * The GUC2PF_VF_STATE_NOTIFY message is used by the GuC to notify PF about change + * of the VF state. + * + * This G2H message is sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_GUC2PF_VF_STATE_NOTIFY` = 0x5106 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **VFID** - VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | DATA2 = **EVENT** - notification event: | + * | | | | + * | | | - _`GUC_PF_NOTIFY_VF_ENABLE` = 1 (only if VFID = 0) | + * | | | - _`GUC_PF_NOTIFY_VF_FLR` = 1 | + * | | | - _`GUC_PF_NOTIFY_VF_FLR_DONE` = 2 | + * | | | - _`GUC_PF_NOTIFY_VF_PAUSE_DONE` = 3 | + * | | | - _`GUC_PF_NOTIFY_VF_FIXUP_DONE` = 4 | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_GUC2PF_VF_STATE_NOTIFY 0x5106u + +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u) +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0 +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT GUC_HXG_EVENT_MSG_n_DATAn +#define GUC_PF_NOTIFY_VF_ENABLE 1u +#define GUC_PF_NOTIFY_VF_FLR 1u +#define GUC_PF_NOTIFY_VF_FLR_DONE 2u +#define GUC_PF_NOTIFY_VF_PAUSE_DONE 3u +#define GUC_PF_NOTIFY_VF_FIXUP_DONE 4u + +/** + * DOC: PF2GUC_VF_CONTROL + * + * The PF2GUC_VF_CONTROL message is used by the PF to trigger VF state change + * maintained by the GuC. + * + * This H2G message must be sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_VF_CONTROL_CMD` = 0x5506 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **VFID** - VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | DATA2 = **COMMAND** - control command: | + * | | | | + * | | | - _`GUC_PF_TRIGGER_VF_PAUSE` = 1 | + * | | | - _`GUC_PF_TRIGGER_VF_RESUME` = 2 | + * | | | - _`GUC_PF_TRIGGER_VF_STOP` = 3 | + * | | | - _`GUC_PF_TRIGGER_VF_FLR_START` = 4 | + * | | | - _`GUC_PF_TRIGGER_VF_FLR_FINISH` = 5 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_PF2GUC_VF_CONTROL 0x5506u + +#define PF2GUC_VF_CONTROL_REQUEST_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u) +#define PF2GUC_VF_CONTROL_REQUEST_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0 +#define PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn +#define PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND GUC_HXG_EVENT_MSG_n_DATAn +#define GUC_PF_TRIGGER_VF_PAUSE 1u +#define GUC_PF_TRIGGER_VF_RESUME 2u +#define GUC_PF_TRIGGER_VF_STOP 3u +#define GUC_PF_TRIGGER_VF_FLR_START 4u +#define GUC_PF_TRIGGER_VF_FLR_FINISH 5u + #endif -- cgit v1.2.3 From aed2c1d70aa008b83c806d33d55b1f782f4fff41 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 26 Mar 2024 20:15:18 +0100 Subject: drm/xe/pf: Add minimal support for VF_STATE_NOTIFY events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC will use VF_STATE_NOTIFY events to notify the PF about changes of the VF state, in particular when a VF FLR was requested. Add very minimal support for such events to avoid reporting errors due to unexpected G2H. We will improve handling of these messages later. While around also add few basic functions to control the VF state (pause, resume, stop) as we will also exercise them soon. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240326191518.363-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 257 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h | 27 +++ drivers/gpu/drm/xe/xe_guc_ct.c | 5 + 4 files changed, 290 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 705c0eaf6e71..21316ee47026 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -156,6 +156,7 @@ xe-y += \ xe_sriov.o xe-$(CONFIG_PCI_IOV) += \ + xe_gt_sriov_pf_control.o \ xe_lmtt.o \ xe_lmtt_2l.o \ xe_lmtt_ml.o diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c new file mode 100644 index 000000000000..40b8f881fe04 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include "abi/guc_actions_sriov_abi.h" + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc_ct.h" +#include "xe_sriov.h" + +static const char *control_cmd_to_string(u32 cmd) +{ + switch (cmd) { + case GUC_PF_TRIGGER_VF_PAUSE: + return "PAUSE"; + case GUC_PF_TRIGGER_VF_RESUME: + return "RESUME"; + case GUC_PF_TRIGGER_VF_STOP: + return "STOP"; + case GUC_PF_TRIGGER_VF_FLR_START: + return "FLR_START"; + case GUC_PF_TRIGGER_VF_FLR_FINISH: + return "FLR_FINISH"; + default: + return ""; + } +} + +static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd) +{ + u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL), + FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid), + FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd), + }; + int ret; + + /* XXX those two commands are now sent from the G2H handler */ + if (cmd == GUC_PF_TRIGGER_VF_FLR_START || cmd == GUC_PF_TRIGGER_VF_FLR_FINISH) + return xe_guc_ct_send_g2h_handler(&guc->ct, request, ARRAY_SIZE(request)); + + ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); + return ret > 0 ? -EPROTO : ret; +} + +static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd) +{ + int err; + + xe_gt_assert(gt, vfid != PFID); + + err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd); + if (unlikely(err)) + xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n", + vfid, control_cmd_to_string(cmd), ERR_PTR(err)); + return err; +} + +static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE); +} + +static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME); +} + +static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP); +} + +static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START); +} + +static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH); +} + +/** + * xe_gt_sriov_pf_control_pause_vf - Pause a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_pause(gt, vfid); +} + +/** + * xe_gt_sriov_pf_control_resume_vf - Resume a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_resume(gt, vfid); +} + +/** + * xe_gt_sriov_pf_control_stop_vf - Stop a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_stop(gt, vfid); +} + +/** + * DOC: The VF FLR Flow with GuC + * + * PF GUC PCI + * ======================================================== + * | | | + * (1) | [ ] <----- FLR --| + * | [ ] : + * (2) [ ] <-------- NOTIFY FLR --[ ] + * [ ] | + * (3) [ ] | + * [ ] | + * [ ]-- START FLR ---------> [ ] + * | [ ] + * (4) | [ ] + * | [ ] + * [ ] <--------- FLR DONE -- [ ] + * [ ] | + * (5) [ ] | + * [ ] | + * [ ]-- FINISH FLR --------> [ ] + * | | + * + * Step 1: PCI HW generates interrupt to the GuC about VF FLR + * Step 2: GuC FW sends G2H notification to the PF about VF FLR + * Step 2a: on some platforms G2H is only received from root GuC + * Step 3: PF sends H2G request to the GuC to start VF FLR sequence + * Step 3a: on some platforms PF must send H2G to all other GuCs + * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done + * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished + */ + +static bool needs_dispatch_flr(struct xe_device *xe) +{ + return xe->info.platform == XE_PVC; +} + +static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_gt *gtit; + unsigned int gtid; + + xe_gt_sriov_info(gt, "VF%u FLR\n", vfid); + + if (needs_dispatch_flr(xe)) { + for_each_gt(gtit, xe, gtid) + pf_send_vf_flr_start(gtit, vfid); + } else { + pf_send_vf_flr_start(gt, vfid); + } +} + +static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) +{ + pf_send_vf_flr_finish(gt, vfid); +} + +static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) +{ + switch (eventid) { + case GUC_PF_NOTIFY_VF_FLR: + pf_handle_vf_flr(gt, vfid); + break; + case GUC_PF_NOTIFY_VF_FLR_DONE: + pf_handle_vf_flr_done(gt, vfid); + break; + case GUC_PF_NOTIFY_VF_PAUSE_DONE: + break; + case GUC_PF_NOTIFY_VF_FIXUP_DONE: + break; + default: + return -ENOPKG; + } + return 0; +} + +static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid) +{ + switch (eventid) { + case GUC_PF_NOTIFY_VF_ENABLE: + xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n", + str_enabled_disabled(true), + str_enabled_disabled(false)); + break; + default: + return -ENOPKG; + } + return 0; +} + +/** + * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC. + * @gt: the &xe_gt + * @msg: the G2H message + * @len: the length of the G2H message + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) +{ + u32 vfid; + u32 eventid; + + xe_gt_assert(gt, len); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == + GUC_ACTION_GUC2PF_VF_STATE_NOTIFY); + + if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt)))) + return -EPROTO; + + if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0]))) + return -EPFNOSUPPORT; + + if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN)) + return -EPROTO; + + vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]); + eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]); + + return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h new file mode 100644 index 000000000000..850a3e37661f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_CONTROL_H_ +#define _XE_GT_SRIOV_PF_CONTROL_H_ + +#include +#include + +struct xe_gt; + +int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); + +#ifdef CONFIG_PCI_IOV +int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); +#else +static inline int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) +{ + return -EPROTO; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 1d930a8eeeca..f4890e9a1e93 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -21,6 +21,7 @@ #include "xe_gt.h" #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_relay.h" @@ -1008,6 +1009,7 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_device *xe = ct_to_xe(ct); struct xe_guc *guc = ct_to_guc(ct); + struct xe_gt *gt = ct_to_gt(ct); u32 hxg_len = msg_len_to_hxg_len(len); u32 *hxg = msg_to_hxg(msg); u32 action, adj_len; @@ -1063,6 +1065,9 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF: ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len); break; + case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY: + ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len); + break; default: drm_err(&xe->drm, "unexpected action 0x%04x\n", action); } -- cgit v1.2.3 From d62753a57de2547c72819cc82b76731f04563433 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 26 Mar 2024 15:44:56 -0700 Subject: drm/xe/gsc: Implement WA 14018094691 The WA states that we need to keep the primary GT powered up during GSC load to allow the GSC FW to access its registers. We also need to make sure that one of the registers is locked before starting the load. v2: fix location of register def (Matt) Bspec: 55928 Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240326224456.518548-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_gsc.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 65af9fe95db5..d5b21f03beaa 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -370,6 +370,9 @@ #define RT_CTRL XE_REG_MCR(0xe530) #define DIS_NULL_QUERY REG_BIT(10) +#define EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK XE_REG_MCR(0xe534) +#define EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT REG_BIT(31) + #define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 8339b0b49dfb..92dc442a5114 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -17,6 +17,7 @@ #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_huc.h" #include "xe_map.h" @@ -252,9 +253,30 @@ static int gsc_upload(struct xe_gsc *gsc) static int gsc_upload_and_init(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); int ret; + if (XE_WA(gt, 14018094691)) { + ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); + + /* + * If the forcewake fails we want to keep going, because the worst + * case outcome in failing to apply the WA is that PXP won't work, + * which is not fatal. We still throw a warning so the issue is + * seen if it happens. + */ + xe_gt_WARN_ON(tile->primary_gt, ret); + + xe_gt_mcr_multicast_write(tile->primary_gt, + EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, + EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); + } + ret = gsc_upload(gsc); + + if (XE_WA(gt, 14018094691)) + xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); + if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 48cdba1cbf95..68600cdead84 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -19,3 +19,4 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) +14018094691 GRAPHICS_VERSION(2004) -- cgit v1.2.3 From ca83f9d20171ce1ba5f82fb28b77eddabd636443 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 28 Mar 2024 14:02:43 +0000 Subject: drm/xe/uapi: Define topology types as indexes rather than masks The topology type is an index (not a mask) so define the values like other indexes instead of using powers of 2. This is also to make clear that the next type can use value 3. This commit does not change the existing values so it does not break compatibility. Cc: Lucas De Marchi Suggested-by: Matt Roper Signed-off-by: Francois Dugast Link: https://lore.kernel.org/intel-xe/20240327232317.GI718896@mdroper-desk1.amr.corp.intel.com/ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240328140243.7-1-francois.dugast@intel.com Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 808ad1c308ec..95a8ecca21f4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -518,9 +518,9 @@ struct drm_xe_query_topology_mask { /** @gt_id: GT ID the mask is associated with */ __u16 gt_id; -#define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0) -#define DRM_XE_TOPO_DSS_COMPUTE (1 << 1) -#define DRM_XE_TOPO_EU_PER_DSS (1 << 2) +#define DRM_XE_TOPO_DSS_GEOMETRY 1 +#define DRM_XE_TOPO_DSS_COMPUTE 2 +#define DRM_XE_TOPO_EU_PER_DSS 4 /** @type: type of mask */ __u16 type; -- cgit v1.2.3 From aee9781f81bb0cedd843a24f961f5067ef0cab6e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 28 Mar 2024 17:28:06 +0100 Subject: drm/xe: Store pointer to struct xe_gt in gt/ debugfs directory Attributes added under 'gt/' directories may wish to use that in case they can't obtain it from elsewhere. Reviewed-by: Rodrigo Vivi Acked-by: Lucas De Marchi Link: https://lore.kernel.org/r/20240214115756.1525-2-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240328162808.451-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 6b4dc2927727..abec616927f0 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -268,6 +268,13 @@ void xe_gt_debugfs_register(struct xe_gt *gt) return; } + /* + * Store the xe_gt pointer as private data of the gt/ directory node + * so other GT specific attributes under that directory may refer to + * it by looking at its parent node private data. + */ + root->d_inode->i_private = gt; + /* * Allocate local copy as we need to pass in the GT to the debugfs * entry and drm_debugfs_create_files just references the drm_info_list -- cgit v1.2.3 From 19b8f86f4aaa56ee0c8ee56eb8e2b30f6d272199 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 28 Mar 2024 17:28:07 +0100 Subject: drm/xe: Define helper for GT specific debugfs files Many of our debugfs files are GT specific and require a pointer to struct xe_gt to correctly show its content. Our initial approach to use drm_info_list.data field to pass pointer not only requires extra steps (like copying template per each GT) but also abuses the rule that this data field should not be device specific. Introduce helper function that will use xe_gt pointer stored at parent directory level and use .data only to pass actual print function that would expects xe_gt pointer as a parameter. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20240214115756.1525-3-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240328162808.451-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 52 ++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_debugfs.h | 2 ++ 2 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index abec616927f0..97c3c2437b9a 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -24,6 +24,58 @@ #include "xe_uc_debugfs.h" #include "xe_wa.h" +/** + * xe_gt_debugfs_simple_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_gt specific. + * + * It is assumed that those debugfs files will be created on directory entry + * which struct dentry d_inode->i_private points to &xe_gt. + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_gt *, struct drm_printer *) + * + * Example:: + * + * int foo(struct xe_gt *gt, struct drm_printer *p) + * { + * drm_printf(p, "GT%u\n", gt->info.id); + * return 0; + * } + * + * static const struct drm_info_list bar[] = { + * { name = "foo", .show = xe_gt_debugfs_simple_show, .data = foo }, + * }; + * + * dir = debugfs_create_dir("gt", parent); + * dir->d_inode->i_private = gt; + * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), dir, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_debugfs_simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_gt *gt = parent->d_inode->i_private; + int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data; + + if (WARN_ON(!print)) + return -EINVAL; + + return print(gt, &p); +} + static struct xe_gt *node_to_gt(struct drm_info_node *node) { return node->info_ent->data; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h index 5a329f118a57..05a6cc93c78c 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.h +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h @@ -6,8 +6,10 @@ #ifndef _XE_GT_DEBUGFS_H_ #define _XE_GT_DEBUGFS_H_ +struct seq_file; struct xe_gt; void xe_gt_debugfs_register(struct xe_gt *gt); +int xe_gt_debugfs_simple_show(struct seq_file *m, void *data); #endif -- cgit v1.2.3 From c54eb24f713dfe19229bc0dec55f50b96058db43 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 28 Mar 2024 17:28:08 +0100 Subject: drm/xe: Refactor GT debugfs We are abusing struct drm_info_list.data by storing there pointer to the xe_gt, while it shouldn't be used for any device specific data. Use recently introduced xe_gt_debugfs_simple_show() that hides all details how to obtain the xe_gt pointer. This will also remove the need for making copies of the struct drm_info_list to get GT specific definitions. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20240214115756.1525-4-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240328162808.451-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 162 ++++++++++++------------------------- 1 file changed, 51 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 97c3c2437b9a..ee4285d42a18 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -76,16 +76,9 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data) return print(gt, &p); } -static struct xe_gt *node_to_gt(struct drm_info_node *node) +static int hw_engines(struct xe_gt *gt, struct drm_printer *p) { - return node->info_ent->data; -} - -static int hw_engines(struct seq_file *m, void *data) -{ - struct xe_gt *gt = node_to_gt(m->private); struct xe_device *xe = gt_to_xe(gt); - struct drm_printer p = drm_seq_file_printer(m); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; int err; @@ -98,7 +91,7 @@ static int hw_engines(struct seq_file *m, void *data) } for_each_hw_engine(hwe, gt, id) - xe_hw_engine_print(hwe, &p); + xe_hw_engine_print(hwe, p); err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_pm_runtime_put(xe); @@ -108,10 +101,8 @@ static int hw_engines(struct seq_file *m, void *data) return 0; } -static int force_reset(struct seq_file *m, void *data) +static int force_reset(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_reset_async(gt); xe_pm_runtime_put(gt_to_xe(gt)); @@ -119,187 +110,154 @@ static int force_reset(struct seq_file *m, void *data) return 0; } -static int sa_info(struct seq_file *m, void *data) +static int sa_info(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); struct xe_tile *tile = gt_to_tile(gt); - struct drm_printer p = drm_seq_file_printer(m); xe_pm_runtime_get(gt_to_xe(gt)); - drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p, + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, tile->mem.kernel_bb_pool->gpu_addr); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int topology(struct seq_file *m, void *data) +static int topology(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_topology_dump(gt, &p); + xe_gt_topology_dump(gt, p); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int steering(struct seq_file *m, void *data) +static int steering(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_mcr_steering_dump(gt, &p); + xe_gt_mcr_steering_dump(gt, p); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int ggtt(struct seq_file *m, void *data) +static int ggtt(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); int ret; xe_pm_runtime_get(gt_to_xe(gt)); - ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); + ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p); xe_pm_runtime_put(gt_to_xe(gt)); return ret; } -static int register_save_restore(struct seq_file *m, void *data) +static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; xe_pm_runtime_get(gt_to_xe(gt)); - xe_reg_sr_dump(>->reg_sr, &p); - drm_printf(&p, "\n"); + xe_reg_sr_dump(>->reg_sr, p); + drm_printf(p, "\n"); - drm_printf(&p, "Engine\n"); + drm_printf(p, "Engine\n"); for_each_hw_engine(hwe, gt, id) - xe_reg_sr_dump(&hwe->reg_sr, &p); - drm_printf(&p, "\n"); + xe_reg_sr_dump(&hwe->reg_sr, p); + drm_printf(p, "\n"); - drm_printf(&p, "LRC\n"); + drm_printf(p, "LRC\n"); for_each_hw_engine(hwe, gt, id) - xe_reg_sr_dump(&hwe->reg_lrc, &p); - drm_printf(&p, "\n"); + xe_reg_sr_dump(&hwe->reg_lrc, p); + drm_printf(p, "\n"); - drm_printf(&p, "Whitelist\n"); + drm_printf(p, "Whitelist\n"); for_each_hw_engine(hwe, gt, id) - xe_reg_whitelist_dump(&hwe->reg_whitelist, &p); + xe_reg_whitelist_dump(&hwe->reg_whitelist, p); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int workarounds(struct seq_file *m, void *data) +static int workarounds(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_wa_dump(gt, &p); + xe_wa_dump(gt, p); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int pat(struct seq_file *m, void *data) +static int pat(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_pat_dump(gt, &p); + xe_pat_dump(gt, p); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int rcs_default_lrc(struct seq_file *m, void *data) +static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int ccs_default_lrc(struct seq_file *m, void *data) +static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int bcs_default_lrc(struct seq_file *m, void *data) +static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int vcs_default_lrc(struct seq_file *m, void *data) +static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int vecs_default_lrc(struct seq_file *m, void *data) +static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); xe_pm_runtime_put(gt_to_xe(gt)); return 0; } static const struct drm_info_list debugfs_list[] = { - {"hw_engines", hw_engines, 0}, - {"force_reset", force_reset, 0}, - {"sa_info", sa_info, 0}, - {"topology", topology, 0}, - {"steering", steering, 0}, - {"ggtt", ggtt, 0}, - {"register-save-restore", register_save_restore, 0}, - {"workarounds", workarounds, 0}, - {"pat", pat, 0}, - {"default_lrc_rcs", rcs_default_lrc}, - {"default_lrc_ccs", ccs_default_lrc}, - {"default_lrc_bcs", bcs_default_lrc}, - {"default_lrc_vcs", vcs_default_lrc}, - {"default_lrc_vecs", vecs_default_lrc}, + {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, + {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, + {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, + {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, + {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, + {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, + {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, + {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, + {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, + {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, + {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, + {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, + {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, + {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, }; void xe_gt_debugfs_register(struct xe_gt *gt) @@ -307,9 +265,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); struct drm_minor *minor = gt_to_xe(gt)->drm.primary; struct dentry *root; - struct drm_info_list *local; char name[8]; - int i; xe_gt_assert(gt, minor->debugfs_root); @@ -327,23 +283,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) */ root->d_inode->i_private = gt; - /* - * Allocate local copy as we need to pass in the GT to the debugfs - * entry and drm_debugfs_create_files just references the drm_info_list - * passed in (e.g. can't define this on the stack). - */ -#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) - local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) - return; - - memcpy(local, debugfs_list, DEBUGFS_SIZE); -#undef DEBUGFS_SIZE - - for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) - local[i].data = gt; - - drm_debugfs_create_files(local, + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), root, minor); -- cgit v1.2.3 From 345dadc4f68b1bd87bd9650b1397a6b75a2573d7 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Thu, 28 Mar 2024 23:24:35 +0530 Subject: drm/xe/hwmon: Add infra to support card power and energy attributes Add infra to support card power and energy attributes through channel 0. Package attributes will be now exposed through channel 1 rather than channel 0 as shown below. Channel 0 i.e power1/energy1_xxx used for card and channel 1 i.e power2/energy2_xxx used for package power,energy attributes. power1/curr1_crit and in0_input are moved to channel 1, i.e. power2/curr2_crit and in1_input as these are available for package only. This would be needed for future platforms where they might be separate registers for package and card power and energy. Each discrete GPU supported by Xe driver, would have a directory in /sys/class/hwmon/ with multiple channels under it. Each channel would have attributes for power, energy etc. Ex: /sys/class/hwmon/hwmon2/power1_max /power1_label /energy1_input /energy1_label Attributes will have a label to get more description of it. Labelling is as below. power1_label/energy1_label - "card", power2_label/energy2_label - "pkg". v2: Fix checkpatch errors. v3: - Update intel-xe-hwmon documentation. (Riana, Badal) - Rename hwmon card channel enum from CHANNEL_PLATFORM to CHANNEL_CARD. (Riana) v4: - Remove unrelated changes from patch. (Anshuman) - Fix typo in commit msg. v5: - Update commit message and intel-xe-hwmon documentation with "Xe" instead of xe when using it as a name. (Rodrigo) Signed-off-by: Karthik Poosa Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20240328175435.3870957-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 94 ++++++--- drivers/gpu/drm/xe/xe_hwmon.c | 212 +++++++++++++-------- 2 files changed, 197 insertions(+), 109 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 023fd82de3f7..d792a56f59ac 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -10,7 +10,7 @@ Description: RW. Card reactive sustained (PL1) power limit in microwatts. power limit is disabled, writing 0 disables the limit. Writing values > 0 and <= TDP will enable the power limit. - Only supported for particular Intel xe graphics platforms. + Only supported for particular Intel Xe graphics platforms. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_rated_max Date: September 2023 @@ -18,53 +18,93 @@ KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org Description: RO. Card default power limit (default TDP setting). - Only supported for particular Intel xe graphics platforms. + Only supported for particular Intel Xe graphics platforms. -What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_crit + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/energy1_input Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org -Description: RW. Card reactive critical (I1) power limit in microwatts. +Description: RO. Card energy input of device in microjoules. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_max_interval +Date: October 2023 +KernelVersion: 6.6 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Card sustained power limit interval (Tau in PL1/Tau) in + milliseconds over which sustained power is averaged. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power2_max +Date: February 2024 +KernelVersion: 6.8 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Package reactive sustained (PL1) power limit in microwatts. + + The power controller will throttle the operating frequency + if the power averaged over a window (typically seconds) + exceeds this limit. A read value of 0 means that the PL1 + power limit is disabled, writing 0 disables the + limit. Writing values > 0 and <= TDP will enable the power limit. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power2_rated_max +Date: February 2024 +KernelVersion: 6.8 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Package default power limit (default TDP setting). - Card reactive critical (I1) power limit in microwatts is exposed + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power2_crit +Date: February 2024 +KernelVersion: 6.8 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Package reactive critical (I1) power limit in microwatts. + + Package reactive critical (I1) power limit in microwatts is exposed for client products. The power controller will throttle the operating frequency if the power averaged over a window exceeds this limit. - Only supported for particular Intel xe graphics platforms. + Only supported for particular Intel Xe graphics platforms. -What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/curr1_crit -Date: September 2023 -KernelVersion: 6.5 +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/curr2_crit +Date: February 2024 +KernelVersion: 6.8 Contact: intel-xe@lists.freedesktop.org -Description: RW. Card reactive critical (I1) power limit in milliamperes. +Description: RW. Package reactive critical (I1) power limit in milliamperes. - Card reactive critical (I1) power limit in milliamperes is + Package reactive critical (I1) power limit in milliamperes is exposed for server products. The power controller will throttle the operating frequency if the power averaged over a window exceeds this limit. -What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/in0_input -Date: September 2023 -KernelVersion: 6.5 +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/energy2_input +Date: February 2024 +KernelVersion: 6.8 Contact: intel-xe@lists.freedesktop.org -Description: RO. Current Voltage in millivolt. +Description: RO. Package energy input of device in microjoules. - Only supported for particular Intel xe graphics platforms. + Only supported for particular Intel Xe graphics platforms. -What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/energy1_input -Date: September 2023 -KernelVersion: 6.5 +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power2_max_interval +Date: February 2024 +KernelVersion: 6.8 Contact: intel-xe@lists.freedesktop.org -Description: RO. Energy input of device in microjoules. +Description: RW. Package sustained power limit interval (Tau in PL1/Tau) in + milliseconds over which sustained power is averaged. - Only supported for particular Intel xe graphics platforms. + Only supported for particular Intel Xe graphics platforms. -What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_max_interval -Date: October 2023 -KernelVersion: 6.6 +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/in1_input +Date: February 2024 +KernelVersion: 6.8 Contact: intel-xe@lists.freedesktop.org -Description: RW. Sustained power limit interval (Tau in PL1/Tau) in - milliseconds over which sustained power is averaged. +Description: RO. Package current voltage in millivolt. - Only supported for particular Intel xe graphics platforms. + Only supported for particular Intel Xe graphics platforms. diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index a256af8c2012..7e8caac838e0 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -34,6 +34,12 @@ enum xe_hwmon_reg_operation { REG_READ64, }; +enum xe_hwmon_channel { + CHANNEL_CARD, + CHANNEL_PKG, + CHANNEL_MAX, +}; + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -69,26 +75,26 @@ struct xe_hwmon { int scl_shift_energy; /** @scl_shift_time: pkg time unit */ int scl_shift_time; - /** @ei: Energy info for energy1_input */ - struct xe_hwmon_energy_info ei; + /** @ei: Energy info for energyN_input */ + struct xe_hwmon_energy_info ei[CHANNEL_MAX]; }; -static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) +static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel) { struct xe_device *xe = gt_to_xe(hwmon->gt); struct xe_reg reg = XE_REG(0); switch (hwmon_reg) { case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) reg = PVC_GT0_PACKAGE_RAPL_LIMIT; - else if (xe->info.platform == XE_DG2) + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) reg = PCU_CR_PACKAGE_RAPL_LIMIT; break; case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) reg = PVC_GT0_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_DG2) + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) reg = PCU_CR_PACKAGE_POWER_SKU; break; case REG_PKG_POWER_SKU_UNIT: @@ -98,13 +104,13 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; break; case REG_GT_PERF_STATUS: - if (xe->info.platform == XE_DG2) + if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG) reg = GT_PERF_STATUS; break; case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) reg = PVC_GT0_PLATFORM_ENERGY_STATUS; - else if (xe->info.platform == XE_DG2) + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) reg = PCU_CR_PACKAGE_ENERGY_STATUS; break; default: @@ -117,11 +123,11 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, enum xe_hwmon_reg_operation operation, u64 *value, - u32 clr, u32 set) + u32 clr, u32 set, int channel) { struct xe_reg reg; - reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg); + reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg, channel); if (!reg.raw) return; @@ -151,13 +157,13 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon * same pattern for sysfs, allow arbitrary PL1 limits to be set but display * clamped values when read. */ -static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val, min, max; mutex_lock(&hwmon->hwmon_lock); - xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, 0, 0, channel); /* Check if PL1 limit is disabled */ if (!(reg_val & PKG_PWR_LIM_1_EN)) { *value = PL1_DISABLE; @@ -167,7 +173,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, ®_val, 0, 0, channel); min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); @@ -179,7 +185,7 @@ unlock: mutex_unlock(&hwmon->hwmon_lock); } -static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) +static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) { int ret = 0; u64 reg_val; @@ -189,9 +195,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ if (value == PL1_DISABLE) { xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, - PKG_PWR_LIM_1_EN, 0); + PKG_PWR_LIM_1_EN, 0, channel); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, - PKG_PWR_LIM_1_EN, 0); + PKG_PWR_LIM_1_EN, 0, channel); if (reg_val & PKG_PWR_LIM_1_EN) { ret = -EOPNOTSUPP; @@ -204,17 +210,17 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, - PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val, channel); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; } -static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val; - xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, ®_val, 0, 0, channel); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); } @@ -237,16 +243,16 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before - * energy1_input overflows. This at 1000 W is an overflow duration of 278 years. + * energyN_input overflows. This at 1000 W is an overflow duration of 278 years. */ static void -xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) +xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { - struct xe_hwmon_energy_info *ei = &hwmon->ei; + struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; u64 reg_val; xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32, - ®_val, 0, 0); + ®_val, 0, 0, channel); if (reg_val >= ei->reg_val_prev) ei->accum_energy += reg_val - ei->reg_val_prev; @@ -260,19 +266,20 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) } static ssize_t -xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr, - char *buf) +xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; + int sensor_index = to_sensor_dev_attr(attr)->index; xe_pm_runtime_get(gt_to_xe(hwmon->gt)); mutex_lock(&hwmon->hwmon_lock); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, - REG_READ32, &r, 0, 0); + REG_READ32, &r, 0, 0, sensor_index); mutex_unlock(&hwmon->hwmon_lock); @@ -300,14 +307,15 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a } static ssize_t -xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; int ret; + int sensor_index = to_sensor_dev_attr(attr)->index; ret = kstrtoul(buf, 0, &val); if (ret) @@ -326,7 +334,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * /* * val must be < max in hwmon interface units. The steps below are - * explained in xe_hwmon_power1_max_interval_show() + * explained in xe_hwmon_power_max_interval_show() */ r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); @@ -360,7 +368,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * mutex_lock(&hwmon->hwmon_lock); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r, - PKG_PWR_LIM_1_TIME, rxy); + PKG_PWR_LIM_1_TIME, rxy, sensor_index); mutex_unlock(&hwmon->hwmon_lock); @@ -370,11 +378,16 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * } static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, - xe_hwmon_power1_max_interval_show, - xe_hwmon_power1_max_interval_store, 0); + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, CHANNEL_CARD); + +static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, CHANNEL_PKG); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_power1_max_interval.dev_attr.attr, + &sensor_dev_attr_power2_max_interval.dev_attr.attr, NULL }; @@ -387,8 +400,7 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, xe_pm_runtime_get(gt_to_xe(hwmon->gt)); - if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr) - ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0; + ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index) ? attr->mode : 0; xe_pm_runtime_put(gt_to_xe(hwmon->gt)); @@ -406,10 +418,11 @@ static const struct attribute_group *hwmon_groups[] = { }; static const struct hwmon_channel_info * const hwmon_info[] = { - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), - HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), - HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), - HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, + HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), + HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), + HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), + HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), NULL }; @@ -432,7 +445,8 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) uval); } -static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor) +static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, + long *value, u32 scale_factor) { int ret; u32 uval; @@ -450,7 +464,8 @@ unlock: return ret; } -static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor) +static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, + long value, u32 scale_factor) { int ret; u32 uval; @@ -464,117 +479,127 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u3 return ret; } -static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val; xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS, - REG_READ32, ®_val, 0, 0); + REG_READ32, ®_val, 0, 0, channel); /* HW register value in units of 2.5 millivolt */ *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); } static umode_t -xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) +xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { u32 uval; switch (attr) { case hwmon_power_max: - return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0; + return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel) ? 0664 : 0; case hwmon_power_rated_max: - return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0; + return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel) ? 0444 : 0; case hwmon_power_crit: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || - !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + if (channel == CHANNEL_PKG) + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + break; + case hwmon_power_label: + return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, channel) ? 0444 : 0; default: return 0; } + return 0; } static int -xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) +xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_power_max: - xe_hwmon_power_max_read(hwmon, val); + xe_hwmon_power_max_read(hwmon, channel, val); return 0; case hwmon_power_rated_max: - xe_hwmon_power_rated_max_read(hwmon, val); + xe_hwmon_power_rated_max_read(hwmon, channel, val); return 0; case hwmon_power_crit: - return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER); + return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); default: return -EOPNOTSUPP; } } static int -xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val) +xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { case hwmon_power_max: - return xe_hwmon_power_max_write(hwmon, val); + return xe_hwmon_power_max_write(hwmon, channel, val); case hwmon_power_crit: - return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER); + return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); default: return -EOPNOTSUPP; } } static umode_t -xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr) +xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) { u32 uval; switch (attr) { case hwmon_curr_crit: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || - (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + case hwmon_curr_label: + if (channel == CHANNEL_PKG) + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + break; default: return 0; } + return 0; } static int -xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val) +xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR); + return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_CURR); default: return -EOPNOTSUPP; } } static int -xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val) +xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR); + return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_CURR); default: return -EOPNOTSUPP; } } static umode_t -xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr) +xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { switch (attr) { case hwmon_in_input: - return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0; + case hwmon_in_label: + return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel) ? 0444 : 0; default: return 0; } } static int -xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) +xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_in_input: - xe_hwmon_get_voltage(hwmon, val); + xe_hwmon_get_voltage(hwmon, channel, val); return 0; default: return -EOPNOTSUPP; @@ -582,22 +607,23 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) } static umode_t -xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr) +xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { switch (attr) { case hwmon_energy_input: - return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0; + case hwmon_energy_label: + return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, channel) ? 0444 : 0; default: return 0; } } static int -xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val) +xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_energy_input: - xe_hwmon_energy_get(hwmon, val); + xe_hwmon_energy_get(hwmon, channel, val); return 0; default: return -EOPNOTSUPP; @@ -618,13 +644,13 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, ret = xe_hwmon_power_is_visible(hwmon, attr, channel); break; case hwmon_curr: - ret = xe_hwmon_curr_is_visible(hwmon, attr); + ret = xe_hwmon_curr_is_visible(hwmon, attr, channel); break; case hwmon_in: - ret = xe_hwmon_in_is_visible(hwmon, attr); + ret = xe_hwmon_in_is_visible(hwmon, attr, channel); break; case hwmon_energy: - ret = xe_hwmon_energy_is_visible(hwmon, attr); + ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); break; default: ret = 0; @@ -650,13 +676,13 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, ret = xe_hwmon_power_read(hwmon, attr, channel, val); break; case hwmon_curr: - ret = xe_hwmon_curr_read(hwmon, attr, val); + ret = xe_hwmon_curr_read(hwmon, attr, channel, val); break; case hwmon_in: - ret = xe_hwmon_in_read(hwmon, attr, val); + ret = xe_hwmon_in_read(hwmon, attr, channel, val); break; case hwmon_energy: - ret = xe_hwmon_energy_read(hwmon, attr, val); + ret = xe_hwmon_energy_read(hwmon, attr, channel, val); break; default: ret = -EOPNOTSUPP; @@ -682,7 +708,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, ret = xe_hwmon_power_write(hwmon, attr, channel, val); break; case hwmon_curr: - ret = xe_hwmon_curr_write(hwmon, attr, val); + ret = xe_hwmon_curr_write(hwmon, attr, channel, val); break; default: ret = -EOPNOTSUPP; @@ -694,10 +720,30 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, return ret; } +static int xe_hwmon_read_label(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, const char **str) +{ + switch (type) { + case hwmon_power: + case hwmon_energy: + case hwmon_curr: + case hwmon_in: + if (channel == CHANNEL_CARD) + *str = "card"; + else if (channel == CHANNEL_PKG) + *str = "pkg"; + return 0; + default: + return -EOPNOTSUPP; + } +} + static const struct hwmon_ops hwmon_ops = { .is_visible = xe_hwmon_is_visible, .read = xe_hwmon_read, .write = xe_hwmon_write, + .read_string = xe_hwmon_read_label, }; static const struct hwmon_chip_info hwmon_chip_info = { @@ -711,14 +757,15 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) struct xe_hwmon *hwmon = xe->hwmon; long energy; u64 val_sku_unit = 0; + int channel; /* * The contents of register PKG_POWER_SKU_UNIT do not change, * so read it once and store the shift values. */ - if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) { + if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0)) { xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, - REG_READ32, &val_sku_unit, 0, 0); + REG_READ32, &val_sku_unit, 0, 0, 0); hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); @@ -728,8 +775,9 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the * first value of the energy register read */ - if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0)) - xe_hwmon_energy_get(hwmon, &energy); + for (channel = 0; channel < CHANNEL_MAX; channel++) + if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) + xe_hwmon_energy_get(hwmon, channel, &energy); } static void xe_hwmon_mutex_destroy(void *arg) -- cgit v1.2.3 From b15e65349553b1689d15fbdebea874ca5ae2274a Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 28 Mar 2024 18:07:39 +0530 Subject: drm/xe/xe_devcoredump: Check NULL before assignments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assign 'xe_devcoredump_snapshot *' and 'xe_device *' only if 'coredump' is not NULL. v2 - Fix commit messages. v3 - Define variables before code.(Ashutosh/Jose) v4 - Drop return check for coredump_to_xe. (Jose/Rodrigo) v5 - Modify misleading commit message. (Matt) Cc: Matt Roper Cc: Ashutosh Dixit Cc: José Roberto de Souza Cc: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240328123739.3633428-1-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_devcoredump.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 3a6263ecff01..a951043b2943 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -77,17 +77,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; - struct xe_device *xe = coredump_to_xe(coredump); - struct xe_devcoredump_snapshot *ss = &coredump->snapshot; + struct xe_device *xe; + struct xe_devcoredump_snapshot *ss; struct drm_printer p; struct drm_print_iterator iter; struct timespec64 ts; int i; - /* Our device is gone already... */ - if (!data || !coredump_to_xe(coredump)) + if (!coredump) return -ENODEV; + xe = coredump_to_xe(coredump); + ss = &coredump->snapshot; + /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); -- cgit v1.2.3 From e27f8a45c81facda4731826e94ed18e66394db2d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 22 Mar 2024 07:27:01 -0700 Subject: drm/xe: Stop passing user flag to xe_bo_create_user() It's quite redundant to pass XE_BO_CREATE_USER_BIT to xe_bo_create_user() since the only difference of that function is to force that flag. Stop passing the flag in the few cases that were explicitly doing so. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240322142702.186529-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 2 +- drivers/gpu/drm/xe/xe_bo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 9f6d571d7fa9..2a86dc4eb8af 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -125,7 +125,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) kunit_info(test, "running %s\n", __func__); bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask); + ttm_bo_type_device, params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 9298546909b5..883f68a527d8 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2279,7 +2279,7 @@ int xe_bo_dumb_create(struct drm_file *file_priv, DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | + XE_BO_SCANOUT_BIT | XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return PTR_ERR(bo); -- cgit v1.2.3 From 62742d12663145160d3b6f0c4209709c1fd343fe Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 22 Mar 2024 07:27:02 -0700 Subject: drm/xe: Normalize bo flags macros The flags stored in the BO grew over time without following much a naming pattern. First of all, get rid of the _BIT suffix that was banned from everywhere else due to the guideline in drivers/gpu/drm/i915/i915_reg.h that xe kind of follows: Define bits using ``REG_BIT(N)``. Do **not** add ``_BIT`` suffix to the name. Here the flags aren't for a register, but it's good practice to keep it consistent. Second divergence on names is the use or not of "CREATE". This is because most of the flags are passed to xe_bo_create*() family of functions, changing its behavior. However, since the flags are also stored in the bo itself and checked elsewhere in the code, it seems better to just omit the CREATE part. With those 2 guidelines, all the flags are given the form XE_BO_FLAG_ with the following commands: git grep -le "XE_BO_" -- drivers/gpu/drm/xe | xargs sed -i \ -e "s/XE_BO_\([_A-Z0-9]*\)_BIT/XE_BO_\1/g" \ -e 's/XE_BO_CREATE_/XE_BO_FLAG_/g' git grep -le "XE_BO_" -- drivers/gpu/drm/xe | xargs sed -i -r \ -e 's/XE_BO_(DEFER_BACKING|SCANOUT|FIXED_PLACEMENT|PAGETABLE|NEEDS_CPU_ACCESS|NEEDS_UC|INTERNAL_TEST|INTERNAL_64K|GGTT_INVALIDATE)/XE_BO_FLAG_\1/g' And then the defines in drivers/gpu/drm/xe/xe_bo.h are adjusted to follow the coding style. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240322142702.186529-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- .../drm/xe/compat-i915-headers/i915_gem_stolen.h | 2 +- drivers/gpu/drm/xe/display/intel_fb_bo.c | 8 +- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 16 ++-- drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 4 +- drivers/gpu/drm/xe/display/xe_fb_pin.c | 22 ++--- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 4 +- drivers/gpu/drm/xe/display/xe_plane_initial.c | 6 +- drivers/gpu/drm/xe/tests/xe_bo.c | 4 +- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 52 +++++----- drivers/gpu/drm/xe/tests/xe_migrate.c | 20 ++-- drivers/gpu/drm/xe/xe_bo.c | 106 ++++++++++----------- drivers/gpu/drm/xe/xe_bo.h | 45 +++++---- drivers/gpu/drm/xe/xe_bo_evict.c | 2 +- drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 12 +-- drivers/gpu/drm/xe/xe_gsc.c | 8 +- drivers/gpu/drm/xe/xe_gsc_proxy.c | 4 +- drivers/gpu/drm/xe/xe_guc_ads.c | 6 +- drivers/gpu/drm/xe/xe_guc_ct.c | 6 +- drivers/gpu/drm/xe/xe_guc_hwconfig.c | 6 +- drivers/gpu/drm/xe/xe_guc_log.c | 6 +- drivers/gpu/drm/xe/xe_guc_pc.c | 6 +- drivers/gpu/drm/xe/xe_huc.c | 4 +- drivers/gpu/drm/xe/xe_hw_engine.c | 6 +- drivers/gpu/drm/xe/xe_lmtt.c | 4 +- drivers/gpu/drm/xe/xe_lrc.c | 6 +- drivers/gpu/drm/xe/xe_memirq.c | 10 +- drivers/gpu/drm/xe/xe_migrate.c | 4 +- drivers/gpu/drm/xe/xe_pt.c | 10 +- drivers/gpu/drm/xe/xe_sa.c | 6 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 4 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- 33 files changed, 202 insertions(+), 203 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h index bd233007c1b7..b4ccc4231e7d 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h @@ -17,7 +17,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, { struct xe_bo *bo; int err; - u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT; + u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; if (align) size = ALIGN(size, align); diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index b21da7b745a5..dba327f53ac5 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -11,7 +11,7 @@ void intel_fb_bo_framebuffer_fini(struct xe_bo *bo) { - if (bo->flags & XE_BO_CREATE_PINNED_BIT) { + if (bo->flags & XE_BO_FLAG_PINNED) { /* Unpin our kernel fb first */ xe_bo_lock(bo, false); xe_bo_unpin(bo); @@ -33,9 +33,9 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, if (ret) return ret; - if (!(bo->flags & XE_BO_SCANOUT_BIT)) { + if (!(bo->flags & XE_BO_FLAG_SCANOUT)) { /* - * XE_BO_SCANOUT_BIT should ideally be set at creation, or is + * XE_BO_FLAG_SCANOUT should ideally be set at creation, or is * automatically set when creating FB. We cannot change caching * mode when the boect is VM_BINDed, so we can only set * coherency with display when unbound. @@ -44,7 +44,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, ttm_bo_unreserve(&bo->ttm); return -EINVAL; } - bo->flags |= XE_BO_SCANOUT_BIT; + bo->flags |= XE_BO_FLAG_SCANOUT; } ttm_bo_unreserve(&bo->ttm); diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 51ae3561fd0d..9e4bcfdbc7e5 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -42,9 +42,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, if (!IS_DGFX(dev_priv)) { obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size, - ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | - XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_PINNED_BIT); + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_PINNED); if (!IS_ERR(obj)) drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n"); else @@ -52,9 +52,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, } if (IS_ERR(obj)) { obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size, - ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) | - XE_BO_CREATE_PINNED_BIT); + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) | + XE_BO_FLAG_PINNED); } if (IS_ERR(obj)) { @@ -81,8 +81,8 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) { - if (obj->flags & XE_BO_CREATE_STOLEN_BIT) + if (!(obj->flags & XE_BO_FLAG_SYSTEM)) { + if (obj->flags & XE_BO_FLAG_STOLEN) info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0); else info->fix.smem_start = diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 27c2fb1c002a..44c9fd2143cc 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -45,8 +45,8 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915), NULL, PAGE_ALIGN(size), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) | + XE_BO_FLAG_GGTT); if (IS_ERR(obj)) { kfree(vma); return false; diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 2a50a7eaaa31..3a584bc3a0a3 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -99,21 +99,21 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, if (IS_DGFX(xe)) dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM0_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_PAGETABLE); + XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE); else dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, - XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_PAGETABLE); + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE); if (IS_ERR(dpt)) dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_PAGETABLE); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE); if (IS_ERR(dpt)) return PTR_ERR(dpt); @@ -262,7 +262,7 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb, if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && - !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) { + !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) { struct xe_tile *tile = xe_device_get_root_tile(xe); /* @@ -355,7 +355,7 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) struct i915_vma *vma; /* We reject creating !SCANOUT fb's, so this is weird.. */ - drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT)); + drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT)); vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 25c73602ef55..ac4b870f73fa 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -73,8 +73,8 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, xe_device_mem_access_get(xe); bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 866d1dd6eeb4..7132cd5d9545 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -62,7 +62,7 @@ initial_plane_bo(struct xe_device *xe, if (plane_config->size == 0) return NULL; - flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT; + flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT; base = round_down(plane_config->base, page_size); if (IS_DGFX(xe)) { @@ -79,7 +79,7 @@ initial_plane_bo(struct xe_device *xe, } phys_base = pte & ~(page_size - 1); - flags |= XE_BO_CREATE_VRAM0_BIT; + flags |= XE_BO_FLAG_VRAM0; /* * We don't currently expect this to ever be placed in the @@ -101,7 +101,7 @@ initial_plane_bo(struct xe_device *xe, if (!stolen) return NULL; phys_base = base; - flags |= XE_BO_CREATE_STOLEN_BIT; + flags |= XE_BO_FLAG_STOLEN; /* * If the FB is too big, just don't use it since fbdev is not very diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 0926a1c2eb86..9f3c02826464 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -116,7 +116,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, int ret; /* TODO: Sanity check */ - unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); + unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); if (IS_DGFX(xe)) kunit_info(test, "Testing vram id %u\n", tile->id); @@ -186,7 +186,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) { struct xe_bo *bo, *external; - unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); + unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); struct xe_gt *__gt; int err, i, id; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 2a86dc4eb8af..d54dd5b43007 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -36,14 +36,14 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, xe_bo_assert_held(imported); mem_type = XE_PL_VRAM0; - if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + if (!(params->mem_mask & XE_BO_FLAG_VRAM0)) /* No VRAM allowed */ mem_type = XE_PL_TT; else if (params->force_different_devices && !p2p_enabled(params)) /* No P2P */ mem_type = XE_PL_TT; else if (params->force_different_devices && !is_dynamic(params) && - (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + (params->mem_mask & XE_BO_FLAG_SYSTEM)) /* Pin migrated to TT */ mem_type = XE_PL_TT; @@ -93,7 +93,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, * possible, saving a migration step as the transfer is just * likely as fast from system memory. */ - if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) + if (params->mem_mask & XE_BO_FLAG_SYSTEM) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); else KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); @@ -115,11 +115,11 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) /* No VRAM on this device? */ if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) && - (params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + (params->mem_mask & XE_BO_FLAG_VRAM0)) return; size = PAGE_SIZE; - if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) && + if ((params->mem_mask & XE_BO_FLAG_VRAM0) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) size = SZ_64K; @@ -148,7 +148,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) */ if (params->force_different_devices && !p2p_enabled(params) && - !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + !(params->mem_mask & XE_BO_FLAG_SYSTEM)) { KUNIT_FAIL(test, "xe_gem_prime_import() succeeded when it shouldn't have\n"); } else { @@ -161,7 +161,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) /* Pinning in VRAM is not allowed. */ if (!is_dynamic(params) && params->force_different_devices && - !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + !(params->mem_mask & XE_BO_FLAG_SYSTEM)) KUNIT_EXPECT_EQ(test, err, -EINVAL); /* Otherwise only expect interrupts or success. */ else if (err && err != -EINTR && err != -ERESTARTSYS) @@ -180,7 +180,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) PTR_ERR(import)); } else if (!params->force_different_devices || p2p_enabled(params) || - (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + (params->mem_mask & XE_BO_FLAG_SYSTEM)) { /* Shouldn't fail if we can reuse same bo, use p2p or use system */ KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", PTR_ERR(import)); @@ -203,52 +203,52 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = { * gem object. */ static const struct dma_buf_test_params test_params[] = { - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0}, + {.mem_mask = XE_BO_FLAG_VRAM0, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &nop2p_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &nop2p_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM}, + {.mem_mask = XE_BO_FLAG_SYSTEM, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0}, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .force_different_devices = true}, {} diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index ce531498f57f..1332832e2f97 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -113,7 +113,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, bo->size, ttm_bo_type_kernel, region | - XE_BO_NEEDS_CPU_ACCESS); + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(remote)) { KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n", str, PTR_ERR(remote)); @@ -191,7 +191,7 @@ out_unlock: static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, struct kunit *test) { - test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT); + test_copy(m, bo, test, XE_BO_FLAG_SYSTEM); } static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, @@ -203,9 +203,9 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, return; if (bo->ttm.resource->mem_type == XE_PL_VRAM0) - region = XE_BO_CREATE_VRAM1_BIT; + region = XE_BO_FLAG_VRAM1; else - region = XE_BO_CREATE_VRAM0_BIT; + region = XE_BO_FLAG_VRAM0; test_copy(m, bo, test, region); } @@ -281,8 +281,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; @@ -290,8 +290,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -301,8 +301,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 883f68a527d8..6166bc715656 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -111,7 +111,7 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo) static bool xe_bo_is_user(struct xe_bo *bo) { - return bo->flags & XE_BO_CREATE_USER_BIT; + return bo->flags & XE_BO_FLAG_USER; } static struct xe_migrate * @@ -137,7 +137,7 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) static void try_add_system(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { + if (bo_flags & XE_BO_FLAG_SYSTEM) { xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); bo->placements[*c] = (struct ttm_place) { @@ -164,12 +164,12 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, * For eviction / restore on suspend / resume objects * pinned in VRAM must be contiguous */ - if (bo_flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT)) + if (bo_flags & (XE_BO_FLAG_PINNED | + XE_BO_FLAG_GGTT)) place.flags |= TTM_PL_FLAG_CONTIGUOUS; if (io_size < vram->usable_size) { - if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) { + if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) { place.fpfn = 0; place.lpfn = io_size >> PAGE_SHIFT; } else { @@ -183,22 +183,22 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + if (bo_flags & XE_BO_FLAG_VRAM0) add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + if (bo_flags & XE_BO_FLAG_VRAM1) add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { + if (bo_flags & XE_BO_FLAG_STOLEN) { xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_STOLEN, - .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT) ? + .flags = bo_flags & (XE_BO_FLAG_PINNED | + XE_BO_FLAG_GGTT) ? TTM_PL_FLAG_CONTIGUOUS : 0, }; *c += 1; @@ -339,7 +339,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, break; } - WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching); + WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); /* * Display scanout is always non-coherent with the CPU cache. @@ -347,8 +347,8 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and * require a CPU:WC mapping. */ - if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) || - (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE)) + if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || + (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE)) caching = ttm_write_combined; err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); @@ -1102,7 +1102,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) struct drm_device *ddev = tbo->base.dev; struct xe_device *xe = to_xe_device(ddev); struct xe_bo *bo = ttm_to_xe_bo(tbo); - bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK; + bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; vm_fault_t ret; int idx; @@ -1215,19 +1215,19 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return ERR_PTR(-EINVAL); } - if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) && - !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && + if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && + !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) || (flags & XE_BO_NEEDS_64K))) { aligned_size = ALIGN(size, SZ_64K); if (type != ttm_bo_type_device) size = ALIGN(size, SZ_64K); - flags |= XE_BO_INTERNAL_64K; + flags |= XE_BO_FLAG_INTERNAL_64K; alignment = SZ_64K >> PAGE_SHIFT; } else { aligned_size = ALIGN(size, SZ_4K); - flags &= ~XE_BO_INTERNAL_64K; + flags &= ~XE_BO_FLAG_INTERNAL_64K; alignment = SZ_4K >> PAGE_SHIFT; } @@ -1256,11 +1256,11 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); if (resv) { - ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT); + ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT); ctx.resv = resv; } - if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) { + if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { err = __xe_bo_placement_for_flags(xe, bo, bo->flags); if (WARN_ON(err)) { xe_ttm_bo_destroy(&bo->ttm); @@ -1270,7 +1270,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, /* Defer populating type_sg bos */ placement = (type == ttm_bo_type_sg || - bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement : + bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement : &bo->placement; err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type, placement, alignment, @@ -1325,21 +1325,21 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, { struct ttm_place *place = bo->placements; - if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT)) + if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM)) return -EINVAL; place->flags = TTM_PL_FLAG_CONTIGUOUS; place->fpfn = start >> PAGE_SHIFT; place->lpfn = end >> PAGE_SHIFT; - switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) { - case XE_BO_CREATE_VRAM0_BIT: + switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) { + case XE_BO_FLAG_VRAM0: place->mem_type = XE_PL_VRAM0; break; - case XE_BO_CREATE_VRAM1_BIT: + case XE_BO_FLAG_VRAM1: place->mem_type = XE_PL_VRAM1; break; - case XE_BO_CREATE_STOLEN_BIT: + case XE_BO_FLAG_STOLEN: place->mem_type = XE_PL_STOLEN; break; @@ -1373,7 +1373,7 @@ __xe_bo_create_locked(struct xe_device *xe, if (IS_ERR(bo)) return bo; - flags |= XE_BO_FIXED_PLACEMENT_BIT; + flags |= XE_BO_FLAG_FIXED_PLACEMENT; err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size); if (err) { xe_bo_free(bo); @@ -1383,7 +1383,7 @@ __xe_bo_create_locked(struct xe_device *xe, bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, vm && !xe_vm_in_fault_mode(vm) && - flags & XE_BO_CREATE_USER_BIT ? + flags & XE_BO_FLAG_USER ? &vm->lru_bulk_move : NULL, size, cpu_caching, type, flags); if (IS_ERR(bo)) @@ -1400,13 +1400,13 @@ __xe_bo_create_locked(struct xe_device *xe, xe_vm_get(vm); bo->vm = vm; - if (bo->flags & XE_BO_CREATE_GGTT_BIT) { - if (!tile && flags & XE_BO_CREATE_STOLEN_BIT) + if (bo->flags & XE_BO_FLAG_GGTT) { + if (!tile && flags & XE_BO_FLAG_STOLEN) tile = xe_device_get_root_tile(xe); xe_assert(xe, tile); - if (flags & XE_BO_FIXED_PLACEMENT_BIT) { + if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, start + bo->size, U64_MAX); } else { @@ -1449,7 +1449,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, cpu_caching, type, - flags | XE_BO_CREATE_USER_BIT); + flags | XE_BO_FLAG_USER); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -1478,12 +1478,12 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile u64 start = offset == ~0ull ? 0 : offset; u64 end = offset == ~0ull ? offset : start + size; - if (flags & XE_BO_CREATE_STOLEN_BIT && + if (flags & XE_BO_FLAG_STOLEN && xe_ttm_stolen_cpu_access_needs_ggtt(xe)) - flags |= XE_BO_CREATE_GGTT_BIT; + flags |= XE_BO_FLAG_GGTT; bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_NEEDS_CPU_ACCESS); + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return bo; @@ -1580,9 +1580,9 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src) { struct xe_bo *bo; - u32 dst_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT; + u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT; - dst_flags |= (*src)->flags & XE_BO_GGTT_INVALIDATE; + dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE; xe_assert(xe, IS_DGFX(xe)); xe_assert(xe, !(*src)->vmap.is_iomem); @@ -1663,8 +1663,8 @@ int xe_bo_pin(struct xe_bo *bo) xe_assert(xe, !xe_bo_is_user(bo)); /* Pinned object must be in GGTT or have pinned flag */ - xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT)); + xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED | + XE_BO_FLAG_GGTT)); /* * No reason we can't support pinning imported dma-bufs we just don't @@ -1685,7 +1685,7 @@ int xe_bo_pin(struct xe_bo *bo) * during suspend / resume (force restore to same physical address). */ if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_INTERNAL_TEST)) { + bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { @@ -1753,7 +1753,7 @@ void xe_bo_unpin(struct xe_bo *bo) xe_assert(xe, xe_bo_is_pinned(bo)); if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_INTERNAL_TEST)) { + bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { @@ -1856,7 +1856,7 @@ int xe_bo_vmap(struct xe_bo *bo) xe_bo_assert_held(bo); - if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) + if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) return -EINVAL; if (!iosys_map_is_null(&bo->vmap)) @@ -1938,29 +1938,29 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags = 0; if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING) - bo_flags |= XE_BO_DEFER_BACKING; + bo_flags |= XE_BO_FLAG_DEFER_BACKING; if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) - bo_flags |= XE_BO_SCANOUT_BIT; + bo_flags |= XE_BO_FLAG_SCANOUT; - bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); + bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { - if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK))) + if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK))) return -EINVAL; - bo_flags |= XE_BO_NEEDS_CPU_ACCESS; + bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS; } if (XE_IOCTL_DBG(xe, !args->cpu_caching || args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC)) return -EINVAL; - if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK && + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK && args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC)) return -EINVAL; - if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT && + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT && args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) return -EINVAL; @@ -2209,7 +2209,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) * can't be used since there's no CCS storage associated with * non-VRAM addresses. */ - if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT)) + if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM)) return false; return true; @@ -2278,9 +2278,9 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_SCANOUT_BIT | - XE_BO_NEEDS_CPU_ACCESS); + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 52e441f77e96..bae042b35fa8 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -23,33 +23,32 @@ #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ -#define XE_BO_CREATE_USER_BIT BIT(0) +#define XE_BO_FLAG_USER BIT(0) /* The bits below need to be contiguous, or things break */ -#define XE_BO_CREATE_SYSTEM_BIT BIT(1) -#define XE_BO_CREATE_VRAM0_BIT BIT(2) -#define XE_BO_CREATE_VRAM1_BIT BIT(3) -#define XE_BO_CREATE_VRAM_MASK (XE_BO_CREATE_VRAM0_BIT | \ - XE_BO_CREATE_VRAM1_BIT) +#define XE_BO_FLAG_SYSTEM BIT(1) +#define XE_BO_FLAG_VRAM0 BIT(2) +#define XE_BO_FLAG_VRAM1 BIT(3) +#define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1) /* -- */ -#define XE_BO_CREATE_STOLEN_BIT BIT(4) -#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \ - (IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \ - XE_BO_CREATE_SYSTEM_BIT) -#define XE_BO_CREATE_GGTT_BIT BIT(5) -#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) -#define XE_BO_CREATE_PINNED_BIT BIT(7) -#define XE_BO_CREATE_NO_RESV_EVICT BIT(8) -#define XE_BO_DEFER_BACKING BIT(9) -#define XE_BO_SCANOUT_BIT BIT(10) -#define XE_BO_FIXED_PLACEMENT_BIT BIT(11) -#define XE_BO_PAGETABLE BIT(12) -#define XE_BO_NEEDS_CPU_ACCESS BIT(13) -#define XE_BO_NEEDS_UC BIT(14) +#define XE_BO_FLAG_STOLEN BIT(4) +#define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \ + XE_BO_FLAG_VRAM0 << (tile)->id : \ + XE_BO_FLAG_SYSTEM) +#define XE_BO_FLAG_GGTT BIT(5) +#define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6) +#define XE_BO_FLAG_PINNED BIT(7) +#define XE_BO_FLAG_NO_RESV_EVICT BIT(8) +#define XE_BO_FLAG_DEFER_BACKING BIT(9) +#define XE_BO_FLAG_SCANOUT BIT(10) +#define XE_BO_FLAG_FIXED_PLACEMENT BIT(11) +#define XE_BO_FLAG_PAGETABLE BIT(12) +#define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13) +#define XE_BO_FLAG_NEEDS_UC BIT(14) #define XE_BO_NEEDS_64K BIT(15) -#define XE_BO_GGTT_INVALIDATE BIT(16) +#define XE_BO_FLAG_GGTT_INVALIDATE BIT(16) /* this one is trigger internally only */ -#define XE_BO_INTERNAL_TEST BIT(30) -#define XE_BO_INTERNAL_64K BIT(31) +#define XE_BO_FLAG_INTERNAL_TEST BIT(30) +#define XE_BO_FLAG_INTERNAL_64K BIT(31) #define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) #define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 630695088b96..541b49007d73 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -146,7 +146,7 @@ int xe_bo_restore_kernel(struct xe_device *xe) return ret; } - if (bo->flags & XE_BO_CREATE_GGTT_BIT) { + if (bo->flags & XE_BO_FLAG_GGTT) { struct xe_tile *tile = bo->tile; mutex_lock(&tile->mem.ggtt->lock); diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 5b26af21e029..68f309f5e981 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -217,7 +217,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, dma_resv_lock(resv, NULL); bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ - ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); + ttm_bo_type_sg, XE_BO_FLAG_SYSTEM); if (IS_ERR(bo)) { ret = PTR_ERR(bo); goto error; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index f659af221bd8..ff2239c0eda5 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -224,11 +224,11 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) * scratch entires, rather keep the scratch page in system memory on * platforms where 64K pages are needed for VRAM. */ - flags = XE_BO_CREATE_PINNED_BIT; + flags = XE_BO_FLAG_PINNED; if (ggtt->flags & XE_GGTT_FLAGS_64K) - flags |= XE_BO_CREATE_SYSTEM_BIT; + flags |= XE_BO_FLAG_SYSTEM; else - flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile); + flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile); ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); if (IS_ERR(ggtt->scratch)) { @@ -375,7 +375,7 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - u16 cache_mode = bo->flags & XE_BO_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; u64 start = bo->ggtt_node.start; u64 offset, pte; @@ -413,7 +413,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); - if (!err && bo->flags & XE_BO_GGTT_INVALIDATE) + if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) xe_ggtt_invalidate(ggtt); xe_device_mem_access_put(tile_to_xe(ggtt->tile)); @@ -457,7 +457,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); xe_ggtt_remove_node(ggtt, &bo->ggtt_node, - bo->flags & XE_BO_GGTT_INVALIDATE); + bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 92dc442a5114..60202b903687 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -130,8 +130,8 @@ static int query_compatibility_version(struct xe_gsc *gsc) bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); return PTR_ERR(bo); @@ -468,8 +468,8 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, ttm_bo_type_kernel, - XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 1ced6b4d4946..35e397b68dfc 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -411,8 +411,8 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { kfree(csme); return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index df2bffb7e220..e025f3e10c9b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -273,9 +273,9 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) ads->regset_size = calculate_regset_size(gt); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index f4890e9a1e93..6c37f4f9bddd 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -159,9 +159,9 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index f035ad59f68e..d9b570a154a2 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -78,9 +78,9 @@ int xe_guc_hwconfig_init(struct xe_guc *guc) return -EINVAL; bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size), - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); guc->hwconfig.bo = bo; diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 9302a7faaf0b..a37ee3419428 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -84,9 +84,9 @@ int xe_guc_log_init(struct xe_guc_log *log) struct xe_bo *bo; bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 9c110537d135..521ae24f2314 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -929,9 +929,9 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) return err; bo = xe_managed_bo_create_pin_map(xe, tile, size, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index b545f850087c..78318d73e4cf 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -59,8 +59,8 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, PXP43_HUC_AUTH_INOUT_SIZE * 2, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b94924a4f319..a688bb2d96ce 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -518,9 +518,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_sr_apply_whitelist(hwe); hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(hwe->hwsp)) { err = PTR_ERR(hwe->hwsp); goto err_name; diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 7f504392a8bf..418661a88918 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -70,8 +70,8 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * lmtt->ops->lmtt_pte_num(level)), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_NEEDS_64K | XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | + XE_BO_NEEDS_64K | XE_BO_FLAG_PINNED); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 2ba111b89a47..552ebf6eeee7 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -743,9 +743,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->bo = xe_bo_create_pin_map(xe, tile, vm, ring_size + xe_lrc_size(xe, hwe->class), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 0eb28681bec7..95b6e9d7b7db 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -127,11 +127,11 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) /* XXX: convert to managed bo */ bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE | - XE_BO_NEEDS_UC | - XE_BO_NEEDS_CPU_ACCESS); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_NEEDS_UC | + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ee1bb938c493..5e0f48c51b72 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -155,8 +155,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 580fe869b414..271f13eeb852 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -108,11 +108,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, pt->level = level; bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | - XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_NO_RESV_EVICT | - XE_BO_PAGETABLE); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_NO_RESV_EVICT | + XE_BO_FLAG_PAGETABLE); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 164202ac6454..8941522b7705 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -48,9 +48,9 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 sa_manager->bo = NULL; bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index fb35e46d68b4..6ffecf9f23d1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -303,7 +303,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, XE_WARN_ON(IS_DGFX(xe)); /* XXX: Require BO to be mapped to GGTT? */ - if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) + if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT))) return -EIO; /* GGTT is always contiguously mapped */ diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 3554f66872b9..ec62296aec33 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -763,8 +763,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) return 0; err = uc_fw_copy(uc_fw, fw->data, fw->size, - XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT | - XE_BO_GGTT_INVALIDATE); + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); uc_fw_release(fw); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 8b32aa5003df..f4bfb2705956 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3069,7 +3069,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_obj; } - if (bos[i]->flags & XE_BO_INTERNAL_64K) { + if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) { if (XE_IOCTL_DBG(xe, obj_offset & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || -- cgit v1.2.3 From 9f18b55b6d3f77b9e778257efdec385d2d5dfa8e Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Mon, 1 Apr 2024 22:08:06 +0530 Subject: drm/xe/xe2: Add workaround 18033852989 This workaround applies to RCS engine's context, hence added as LRC workaround. v2 - Fix commit description as lrc workaround instead of engine.(Lucas) v3 - COMMON_SLICE_CHICKEN1 is a masked register, add XE_REG_OPTION_MASKED flag. (Matt) BSPEC: 55899 Cc: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240401163806.3821128-1-himal.prasad.ghimiray@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 ++- drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d5b21f03beaa..6617c86a096b 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -97,7 +97,8 @@ #define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) -#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010) +#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED) +#define DISABLE_BOTTOM_CLIP_RECTANGLE_TEST REG_BIT(14) #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 74b33a3845f2..c904e55ced9c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -579,6 +579,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) }, + { XE_RTP_NAME("18033852989"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + }, {} }; -- cgit v1.2.3 From 37c15c4aae1fe3f67efd2641db8d8c25c2d524ab Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 1 Apr 2024 15:19:11 -0700 Subject: drm/xe: Use ordered wq for preempt fence waiting Preempt fences can sleep waiting for an exec queue suspend operation to complete. If the system_unbound_wq is used for waiting and the number of waiters exceeds max_active this will result in other users of the system_unbound_wq getting starved. Use a device private work queue for preempt fences to avoid starvation of the system_unbound_wq. Even though suspend operations can complete out-of-order, all suspend operations within a VM need to complete before the preempt rebind worker can start. With that, use a device private ordered wq for preempt fence waiting. v2: - Add comment about cleanup on failure (Matt R) - Update commit message (Lucas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240401221913.139672-2-matthew.brost@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 11 ++++++++++- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_preempt_fence.c | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 01bd5ccf05ca..9083f5e02dd9 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -226,6 +226,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); + if (xe->preempt_fence_wq) + destroy_workqueue(xe->preempt_fence_wq); + if (xe->ordered_wq) destroy_workqueue(xe->ordered_wq); @@ -291,9 +294,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.evicted); + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); - if (!xe->ordered_wq || !xe->unordered_wq) { + if (!xe->ordered_wq || !xe->unordered_wq || + !xe->preempt_fence_wq) { + /* + * Cleanup done in xe_device_destroy via + * drmm_add_action_or_reset register above + */ drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); err = -ENOMEM; goto err; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 1df3dcc17d75..c710cec835a7 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -363,6 +363,9 @@ struct xe_device { /** @ufence_wq: user fence wait queue */ wait_queue_head_t ufence_wq; + /** @preempt_fence_wq: used to serialize preempt fences */ + struct workqueue_struct *preempt_fence_wq; + /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7bce2a332603..7d50c6e89d8e 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence) struct xe_exec_queue *q = pfence->q; pfence->error = q->ops->suspend(q); - queue_work(system_unbound_wq, &pfence->preempt_work); + queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work); return true; } -- cgit v1.2.3 From 34820967ae7b45411f8f4f737c2d63b0c608e0d7 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Mon, 1 Apr 2024 23:23:00 +0530 Subject: drm/xe/xe_migrate: Cast to output precision before multiplying operands Addressing potential overflow in result of multiplication of two lower precision (u32) operands before widening it to higher precision (u64). -v2 Fix commit message and description. (Rodrigo) Cc: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240401175300.3823653-1-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 5e0f48c51b72..524b0198fcae 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (vm->flags & XE_VM_FLAG_64K && level == 1) flags = XE_PDE_64K; - entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * + entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) * XE_PAGE_SIZE, pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); @@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Write PDE's that point to our BO. */ for (i = 0; i < num_entries - num_level; i++) { - entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, + entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + @@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, #define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) #define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) drm_suballoc_manager_init(&m->vm_update_sa, - (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * + (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * NUM_VMUSA_UNIT_PER_PAGE, 0); m->pt_bo = bo; @@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m, struct xe_vm *vm = m->q->vm; u16 pat_index; u32 ptes; - u64 ofs = at_pt * XE_PAGE_SIZE; + u64 ofs = (u64)at_pt * XE_PAGE_SIZE; u64 cur_ofs; /* Indirect access needs compression enabled uncached PAT index */ -- cgit v1.2.3 From 972d01d0e357f3799203fa64ab696ac035e16803 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 3 Apr 2024 15:50:44 -0400 Subject: drm/xe: Protect devcoredump access after unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While we don't have the full flow protection when devcoredump is accessed after device unbind. Let's at least for now protect against null dereference: [ 422.766508] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 423.119584] RIP: 0010:xe_vm_snapshot_free+0x30/0x180 [xe] While at it, I also fixed a non-standard code-declaration block on the similar function of xe_guc_submit. v2: - Use IS_ERR_OR_NULL (Nirmoy) - Expand to other functions Cc: José Roberto de Souza Cc: Nirmoy Das Reviewed-by: Nirmoy Das Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240403195044.239766-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 1 + drivers/gpu/drm/xe/xe_vm.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 13b7e195c7b5..9c30bd9ac8c0 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1928,6 +1928,7 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) { int i; + if (!snapshot) return; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f4bfb2705956..2a431690d245 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3426,7 +3426,7 @@ out_unlock: void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) { - if (IS_ERR(snap)) + if (IS_ERR_OR_NULL(snap)) return; for (int i = 0; i < snap->num_snaps; i++) { @@ -3483,7 +3483,7 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) { unsigned long i, j; - if (IS_ERR(snap)) { + if (IS_ERR_OR_NULL(snap)) { drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); return; } @@ -3514,7 +3514,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) { unsigned long i; - if (IS_ERR(snap)) + if (IS_ERR_OR_NULL(snap)) return; for (i = 0; i < snap->num_snaps; i++) { -- cgit v1.2.3 From a3c86b6d7b1c8ffb46ffd34c3dbe9252da87956b Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Sun, 10 Dec 2023 05:29:48 +0530 Subject: drm/xe: prefer snprintf over sprintf since the sprintf() function lacks built-in protection against buffer overflows using the snprintf() function. v2: Removed hard coded values and used sizeof() Signed-off-by: Bommu Krishnaiah Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20231209235949.54524-2-krishnaiah.bommu@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_exec_queue.c | 12 ++++++------ drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_gt_idle.c | 4 ++-- drivers/gpu/drm/xe/xe_hw_fence.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 8abdf3c17e1d..86150cafe0ff 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -129,7 +129,7 @@ void xe_debugfs_register(struct xe_device *xe) if (man) { char name[16]; - sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0); + snprintf(name, sizeof(name), "vram%d_mm", mem_type - XE_PL_VRAM0); ttm_resource_manager_create_debugfs(man, root, name); } } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 730eb7d2a639..71bd52dfebcf 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -225,22 +225,22 @@ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) { switch (q->class) { case XE_ENGINE_CLASS_RENDER: - sprintf(q->name, "rcs%d", instance); + snprintf(q->name, sizeof(q->name), "rcs%d", instance); break; case XE_ENGINE_CLASS_VIDEO_DECODE: - sprintf(q->name, "vcs%d", instance); + snprintf(q->name, sizeof(q->name), "vcs%d", instance); break; case XE_ENGINE_CLASS_VIDEO_ENHANCE: - sprintf(q->name, "vecs%d", instance); + snprintf(q->name, sizeof(q->name), "vecs%d", instance); break; case XE_ENGINE_CLASS_COPY: - sprintf(q->name, "bcs%d", instance); + snprintf(q->name, sizeof(q->name), "bcs%d", instance); break; case XE_ENGINE_CLASS_COMPUTE: - sprintf(q->name, "ccs%d", instance); + snprintf(q->name, sizeof(q->name), "ccs%d", instance); break; case XE_ENGINE_CLASS_OTHER: - sprintf(q->name, "gsccs%d", instance); + snprintf(q->name, sizeof(q->name), "gsccs%d", instance); break; default: XE_WARN_ON(q->class); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index ee4285d42a18..ff7f4cf52fa9 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -269,7 +269,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) xe_gt_assert(gt, minor->debugfs_root); - sprintf(name, "gt%d", gt->info.id); + snprintf(name, sizeof(name), "gt%d", gt->info.id); root = debugfs_create_dir(name, minor->debugfs_root); if (IS_ERR(root)) { drm_warn(&xe->drm, "Create GT directory failed"); diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 2984680de3f9..bc1426f8d731 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -166,10 +166,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) } if (xe_gt_is_media_type(gt)) { - sprintf(gtidle->name, "gt%d-mc", gt->info.id); + snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; } else { - sprintf(gtidle->name, "gt%d-rc", gt->info.id); + snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-rc", gt->info.id); gtidle->idle_residency = xe_guc_pc_rc6_residency; } diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index a5de3e7b0bd6..f872ef103127 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -130,7 +130,7 @@ void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, ctx->irq = irq; ctx->dma_fence_ctx = dma_fence_context_alloc(1); ctx->next_seqno = XE_FENCE_INITIAL_SEQNO; - sprintf(ctx->name, "%s", name); + snprintf(ctx->name, sizeof(ctx->name), "%s", name); } void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx) -- cgit v1.2.3 From 91b93fae179fca7366acbca662f7582dab062863 Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Sun, 10 Dec 2023 05:29:49 +0530 Subject: drm/xe/xe_hw_engine_class_sysfs: use sysfs_emit() for attr's _show() sprintf() is deprecated for sysfs, use preferred sysfs_emit() instead. v2: used sysfs_emit instand of sprintf Signed-off-by: Bommu Krishnaiah Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20231209235949.54524-3-krishnaiah.bommu@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index aba01edffacd..c5084d94c442 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -72,7 +72,7 @@ static ssize_t job_timeout_max_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_max); } static const struct kobj_attribute job_timeout_max_attr = @@ -108,7 +108,7 @@ static ssize_t job_timeout_min_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_min); } static const struct kobj_attribute job_timeout_min_attr = @@ -141,7 +141,7 @@ static ssize_t job_timeout_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms); + return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_ms); } static const struct kobj_attribute job_timeout_attr = @@ -152,7 +152,7 @@ static ssize_t job_timeout_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms); + return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_ms); } static const struct kobj_attribute job_timeout_def = @@ -163,7 +163,7 @@ static ssize_t job_timeout_min_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_min); } static const struct kobj_attribute job_timeout_min_def = @@ -174,7 +174,7 @@ static ssize_t job_timeout_max_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_max); } static const struct kobj_attribute job_timeout_max_def = @@ -233,7 +233,7 @@ static ssize_t timeslice_duration_max_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max); + return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_max); } static const struct kobj_attribute timeslice_duration_max_attr = @@ -271,7 +271,7 @@ static ssize_t timeslice_duration_min_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min); + return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_min); } static const struct kobj_attribute timeslice_duration_min_attr = @@ -283,7 +283,7 @@ static ssize_t timeslice_duration_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us); + return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_us); } static const struct kobj_attribute timeslice_duration_attr = @@ -295,7 +295,7 @@ static ssize_t timeslice_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.timeslice_us); + return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_us); } static const struct kobj_attribute timeslice_duration_def = @@ -306,7 +306,7 @@ static ssize_t timeslice_min_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.timeslice_min); + return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_min); } static const struct kobj_attribute timeslice_duration_min_def = @@ -317,7 +317,7 @@ static ssize_t timeslice_max_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.timeslice_max); + return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_max); } static const struct kobj_attribute timeslice_duration_max_def = @@ -350,7 +350,7 @@ static ssize_t preempt_timeout_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us); + return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_us); } static const struct kobj_attribute preempt_timeout_attr = @@ -362,7 +362,7 @@ static ssize_t preempt_timeout_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us); + return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_us); } static const struct kobj_attribute preempt_timeout_def = @@ -374,7 +374,7 @@ static ssize_t preempt_timeout_min_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_min); } static const struct kobj_attribute preempt_timeout_min_def = @@ -386,7 +386,7 @@ static ssize_t preempt_timeout_max_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_max); } static const struct kobj_attribute preempt_timeout_max_def = @@ -422,7 +422,7 @@ static ssize_t preempt_timeout_max_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_max); } static const struct kobj_attribute preempt_timeout_max_attr = @@ -459,7 +459,7 @@ static ssize_t preempt_timeout_min_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_min); } static const struct kobj_attribute preempt_timeout_min_attr = -- cgit v1.2.3 From 12f95f9900c07e198c6e3ee91d157dda447c4a9c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 17:50:45 +0200 Subject: drm/xe/guc: Prefer GT oriented logs for GuC messages A platform can have more than one GuC, so we should use GT-oriented logs to correctly identify the source of the message. Reviewed-by: Lucas De Marchi Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240404155046.627-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 79 +++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 77be3bc2d7c0..afb083c7cbfa 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -18,6 +18,7 @@ #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc_ads.h" #include "xe_guc_ct.h" #include "xe_guc_hwconfig.h" @@ -181,7 +182,7 @@ static u32 guc_ctl_devid(struct xe_guc *guc) static void guc_init_params(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u32 *params = guc->params; int i; @@ -196,12 +197,12 @@ static void guc_init_params(struct xe_guc *guc) params[GUC_CTL_DEVID] = guc_ctl_devid(guc); for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); + xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]); } static void guc_init_params_post_hwconfig(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u32 *params = guc->params; int i; @@ -216,7 +217,7 @@ static void guc_init_params_post_hwconfig(struct xe_guc *guc) params[GUC_CTL_DEVID] = guc_ctl_devid(guc); for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); + xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]); } /* @@ -321,7 +322,7 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = drmm_add_action_or_reset(>_to_xe(gt)->drm, guc_fini, guc); + ret = drmm_add_action_or_reset(&xe->drm, guc_fini, guc); if (ret) goto out; @@ -334,7 +335,7 @@ int xe_guc_init(struct xe_guc *guc) return 0; out: - drm_err(&xe->drm, "GuC init failed with %d", ret); + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); return ret; } @@ -371,7 +372,6 @@ int xe_guc_post_load_init(struct xe_guc *guc) int xe_guc_reset(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); u32 guc_status, gdrst; int ret; @@ -382,16 +382,14 @@ int xe_guc_reset(struct xe_guc *guc) ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); if (ret) { - drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n", - gdrst); + xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst); goto err_out; } guc_status = xe_mmio_read32(gt, GUC_STATUS); if (!(guc_status & GS_MIA_IN_RESET)) { - drm_err(&xe->drm, - "GuC status: 0x%x, MIA core expected to be in reset\n", - guc_status); + xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n", + guc_status); ret = -EIO; goto err_out; } @@ -454,7 +452,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) static int guc_wait_ucode(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u32 status; int ret; @@ -475,35 +473,32 @@ static int guc_wait_ucode(struct xe_guc *guc) * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. */ - ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK, + ret = xe_mmio_wait32(gt, GUC_STATUS, GS_UKERNEL_MASK, FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), 200000, &status, false); if (ret) { - struct drm_device *drm = &xe->drm; - - drm_info(drm, "GuC load failed: status = 0x%08X\n", status); - drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status), - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status); + xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - drm_info(drm, "GuC firmware signature verification failed\n"); + xe_gt_info(gt, "GuC firmware signature verification failed\n"); ret = -ENOEXEC; } if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == XE_GUC_LOAD_STATUS_EXCEPTION) { - drm_info(drm, "GuC firmware exception. EIP: %#x\n", - xe_mmio_read32(guc_to_gt(guc), - SOFT_SCRATCH(13))); + xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n", + xe_mmio_read32(gt, SOFT_SCRATCH(13))); ret = -ENXIO; } } else { - drm_dbg(&xe->drm, "GuC successfully loaded"); + xe_gt_dbg(gt, "GuC successfully loaded\n"); } return ret; @@ -603,12 +598,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc) xe_mmio_write32(gt, SOFT_SCRATCH(15), 0); if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) - drm_err(&guc_to_xe(guc)->drm, - "Received early GuC crash dump notification!\n"); + xe_gt_err(gt, "Received early GuC crash dump notification!\n"); if (msg & XE_GUC_RECV_MSG_EXCEPTION) - drm_err(&guc_to_xe(guc)->drm, - "Received early GuC exception notification!\n"); + xe_gt_err(gt, "Received early GuC exception notification!\n"); } static void guc_enable_irq(struct xe_guc *guc) @@ -659,15 +652,15 @@ int xe_guc_enable_communication(struct xe_guc *guc) int xe_guc_suspend(struct xe_guc *guc) { - int ret; + struct xe_gt *gt = guc_to_gt(guc); u32 action[] = { XE_GUC_ACTION_CLIENT_SOFT_RESET, }; + int ret; ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); if (ret) { - drm_err(&guc_to_xe(guc)->drm, - "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret); + xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret)); return ret; } @@ -742,8 +735,8 @@ retry: 50000, &reply, false); if (ret) { timeout: - drm_err(&xe->drm, "mmio request %#x: no reply %#x\n", - request[0], reply); + xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", + request[0], reply); return ret; } @@ -781,8 +774,8 @@ timeout: GUC_HXG_TYPE_NO_RESPONSE_RETRY) { u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); - drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n", - request[0], reason); + xe_gt_dbg(gt, "GuC mmio request %#x: retrying, reason %#x\n", + request[0], reason); goto retry; } @@ -791,16 +784,16 @@ timeout: u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); - drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n", - request[0], error, hint); + xe_gt_err(gt, "GuC mmio request %#x: failure %#x hint %#x\n", + request[0], error, hint); return -ENXIO; } if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { proto: - drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n", - request[0], header); + xe_gt_err(gt, "GuC mmio request %#x: unexpected reply %#x\n", + request[0], header); return -EPROTO; } -- cgit v1.2.3 From f73155654de519e2bc003b1f4b06f4f0e74b83be Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 17:50:46 +0200 Subject: drm/xe/guc: Reuse code while debugging GuC params There is no need to duplicate code to print GuC parameters. Reviewed-by: Lucas De Marchi Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240404155046.627-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index afb083c7cbfa..59fe73770711 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -180,7 +180,7 @@ static u32 guc_ctl_devid(struct xe_guc *guc) return (((u32)xe->info.devid) << 16) | xe->info.revid; } -static void guc_init_params(struct xe_guc *guc) +static void guc_print_params(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); u32 *params = guc->params; @@ -189,6 +189,14 @@ static void guc_init_params(struct xe_guc *guc) BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]); +} + +static void guc_init_params(struct xe_guc *guc) +{ + u32 *params = guc->params; + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = 0; params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); @@ -196,18 +204,12 @@ static void guc_init_params(struct xe_guc *guc) params[GUC_CTL_WA] = 0; params[GUC_CTL_DEVID] = guc_ctl_devid(guc); - for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]); + guc_print_params(guc); } static void guc_init_params_post_hwconfig(struct xe_guc *guc) { - struct xe_gt *gt = guc_to_gt(guc); u32 *params = guc->params; - int i; - - BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); - BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); @@ -216,8 +218,7 @@ static void guc_init_params_post_hwconfig(struct xe_guc *guc) params[GUC_CTL_WA] = guc_ctl_wa_flags(guc); params[GUC_CTL_DEVID] = guc_ctl_devid(guc); - for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]); + guc_print_params(guc); } /* -- cgit v1.2.3 From 788d2ad60d961f1f2713fa17856bce509a966d40 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 5 Apr 2024 08:23:36 +0200 Subject: drm/xe: fix multicast support for Xe_LP platforms Xe_LP has six sublices per slice. v2: fixed commit message and subject (Matt) Bspec: 66696 Fixes: bde5d76785bc ("drm/xe: Add helper macro to loop each DSS") Signed-off-by: Andrzej Hajda Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240405-mcr_adlp-v2-1-2fd1e4325ef2@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 866bbd26ba3f..577bd7043740 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -297,7 +297,12 @@ static void init_steering_mslice(struct xe_gt *gt) static unsigned int dss_per_group(struct xe_gt *gt) { - return gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; + if (gt_to_xe(gt)->info.platform == XE_PVC) + return 8; + else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + return 4; + else + return 6; } /** -- cgit v1.2.3 From 48651e18bbe033be5444a01d37565e75ffef20fd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 5 Apr 2024 14:35:20 +0200 Subject: drm/xe: Move PTE/PDE bit definitions to proper header We already have dedicated header for GGTT/PPGTT definitions. It's also cleaner to separate them from implementation macros. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Matt Roper Acked-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240405123520.847-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/display/xe_plane_initial.c | 1 + drivers/gpu/drm/xe/regs/xe_gtt_defs.h | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 21 --------------------- drivers/gpu/drm/xe/xe_migrate.c | 1 + drivers/gpu/drm/xe/xe_pt.c | 1 + drivers/gpu/drm/xe/xe_vm.c | 1 + 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 7132cd5d9545..9693c56d386b 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -6,6 +6,7 @@ /* for ioread64 */ #include +#include "regs/xe_gtt_defs.h" #include "xe_ggtt.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h index 9196d71bad37..558519ce48c7 100644 --- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h @@ -11,4 +11,25 @@ #define GUC_GGTT_TOP 0xFEE00000 +#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) +#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) +#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12) +#define XE_PPGTT_PTE_PAT2 BIT_ULL(7) +#define XE_PPGTT_PTE_PAT1 BIT_ULL(4) +#define XE_PPGTT_PTE_PAT0 BIT_ULL(3) + +#define XE_PDE_PS_2M BIT_ULL(7) +#define XE_PDPE_PS_1G BIT_ULL(7) +#define XE_PDE_IPS_64K BIT_ULL(11) + +#define XE_GGTT_PTE_DM BIT_ULL(1) +#define XE_USM_PPGTT_PTE_AE BIT_ULL(10) +#define XE_PPGTT_PTE_DM BIT_ULL(11) +#define XE_PDE_64K BIT_ULL(6) +#define XE_PTE_PS64 BIT_ULL(8) +#define XE_PTE_NULL BIT_ULL(9) + +#define XE_PAGE_PRESENT BIT_ULL(0) +#define XE_PAGE_RW BIT_ULL(1) + #endif diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index bae042b35fa8..4824ab145bc4 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -50,13 +50,6 @@ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) #define XE_BO_FLAG_INTERNAL_64K BIT(31) -#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) -#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) -#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12) -#define XE_PPGTT_PTE_PAT2 BIT_ULL(7) -#define XE_PPGTT_PTE_PAT1 BIT_ULL(4) -#define XE_PPGTT_PTE_PAT0 BIT_ULL(3) - #define XE_PTE_SHIFT 12 #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) #define XE_PTE_MASK (XE_PAGE_SIZE - 1) @@ -69,20 +62,6 @@ #define XE_64K_PTE_MASK (XE_64K_PAGE_SIZE - 1) #define XE_64K_PDE_MASK (XE_PDE_MASK >> 4) -#define XE_PDE_PS_2M BIT_ULL(7) -#define XE_PDPE_PS_1G BIT_ULL(7) -#define XE_PDE_IPS_64K BIT_ULL(11) - -#define XE_GGTT_PTE_DM BIT_ULL(1) -#define XE_USM_PPGTT_PTE_AE BIT_ULL(10) -#define XE_PPGTT_PTE_DM BIT_ULL(11) -#define XE_PDE_64K BIT_ULL(6) -#define XE_PTE_PS64 BIT_ULL(8) -#define XE_PTE_NULL BIT_ULL(9) - -#define XE_PAGE_PRESENT BIT_ULL(0) -#define XE_PAGE_RW BIT_ULL(1) - #define XE_PL_SYSTEM TTM_PL_SYSTEM #define XE_PL_TT TTM_PL_TT #define XE_PL_VRAM0 TTM_PL_VRAM diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 524b0198fcae..fe713d57cbf6 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -16,6 +16,7 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" +#include "regs/xe_gtt_defs.h" #include "tests/xe_test.h" #include "xe_assert.h" #include "xe_bb.h" diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 271f13eeb852..5b7930f46cf3 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -5,6 +5,7 @@ #include "xe_pt.h" +#include "regs/xe_gtt_defs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_drm_client.h" diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2a431690d245..fff10e1717ff 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -21,6 +21,7 @@ #include +#include "regs/xe_gtt_defs.h" #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" -- cgit v1.2.3 From 1d7d997cd764ad1d462d857e3036d61bbcf534d1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 5 Apr 2024 13:38:44 +0200 Subject: drm/xe: Drop xe_vm_assert_held() macro definition from xe_bo.h It is already defined in xe_vm.h and shouldn't be duplicated. Signed-off-by: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240405113844.803-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_bo.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 4824ab145bc4..a885b14bf595 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -13,14 +13,6 @@ #include "xe_vm_types.h" #include "xe_vm.h" -/** - * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. - * @vm: The vm - */ -#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) - - - #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define XE_BO_FLAG_USER BIT(0) -- cgit v1.2.3 From 104f7519db0a6ba4d1df1d77e4244e6f96d809ee Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 6 Apr 2024 16:39:45 +0200 Subject: drm/xe/guc: Use drm_device-managed version of mutex_init() This is safer approach and will help resolve a cleanup ordering conflict related to the GuC ID manager. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240406143946.979-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9c30bd9ac8c0..4c444fddfba6 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -238,7 +238,6 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) xa_destroy(&guc->submission_state.exec_queue_lookup); free_submit_wq(guc); - mutex_destroy(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -263,13 +262,16 @@ int xe_guc_submit_init(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); int err; + err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); + if (err) + return err; + err = alloc_submit_wq(guc); if (err) return err; gt->exec_queue_ops = &guc_exec_queue_ops; - mutex_init(&guc->submission_state.lock); xa_init(&guc->submission_state.exec_queue_lookup); spin_lock_init(&guc->submission_state.suspend.lock); -- cgit v1.2.3 From 83787afe069ee2bc5ed4aaf6e29bdaabd06fb4db Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 6 Apr 2024 16:39:46 +0200 Subject: drm/xe/guc: Initialize GuC ID manager sooner The GuC submission cleanup code may depend on the GuC ID manager, thus we can't initialize it after registering a submission cleanup action, as reverse cleanup sequence will destroy GuC ID manager prior to a call to guc_submit_fini(). Move GuC ID manager initialization up, right after managed mutex initialization, to have it available during guc_submit_fini(). Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240406143946.979-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 4c444fddfba6..61e7a8fbd18c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -266,6 +266,10 @@ int xe_guc_submit_init(struct xe_guc *guc) if (err) return err; + err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0); + if (err) + return err; + err = alloc_submit_wq(guc); if (err) return err; @@ -279,15 +283,7 @@ int xe_guc_submit_init(struct xe_guc *guc) primelockdep(guc); - err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); - if (err) - return err; - - err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) -- cgit v1.2.3 From 97515d0b3ed9243e613dcec36e03528b303314d0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 5 Apr 2024 15:39:34 +0200 Subject: drm/xe/vf: Don't emit access to Global HWSP if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VFs can't access Global HWSP, don't emit questionable MI_FLUSH_DW while processing a migration job. Bspec: 52398 Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240405133936.891-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_ring_ops.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 5b2b37b59813..d42b3f33bd7a 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -17,6 +17,7 @@ #include "xe_lrc.h" #include "xe_macros.h" #include "xe_sched_job.h" +#include "xe_sriov.h" #include "xe_vm_types.h" #include "xe_vm.h" #include "xe_wa.h" @@ -367,10 +368,12 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(0, dw, i); - dw[i++] = preparser_disable(false); + if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { + /* XXX: Do we need this? Leaving for now. */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + } i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); -- cgit v1.2.3 From fe4b17c4f775c5eb5861d944e1a98880da4bef71 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 5 Apr 2024 15:39:35 +0200 Subject: drm/xe/vf: Don't try to program MOCS if VF VFs drivers don't have access to MOCS registers. It is a PF driver responsibility to program MOCS according to the HW team guidelines. Signed-off-by: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240405133936.891-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_mocs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index bff659d20062..d16fa64da881 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -568,6 +568,9 @@ void xe_mocs_init(struct xe_gt *gt) flags = get_mocs_settings(gt_to_xe(gt), &table); mocs_dbg(gt, "flag:0x%x\n", flags); + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt, &table); if (flags & HAS_LNCF_MOCS) -- cgit v1.2.3 From f2b81483d39d14a3028a4592ee1f89578832d0fa Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 5 Apr 2024 15:39:36 +0200 Subject: drm/xe/vf: Don't try to read legacy GuC MMIO notification if VF Legacy SOFT_SCRATCH registers are not accessible from the VF. Any G2H notification posted there will be handled by the PF driver. Signed-off-by: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240405133936.891-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 59fe73770711..240e7a4bbff1 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -591,6 +591,9 @@ static void guc_handle_mmio_msg(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); u32 msg; + if (IS_SRIOV_VF(guc_to_xe(guc))) + return; + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); msg = xe_mmio_read32(gt, SOFT_SCRATCH(15)); -- cgit v1.2.3 From 66cb3ca9138611e5188af093a7e26a20fafa0aad Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 19:38:14 +0200 Subject: drm/xe/vf: Mark supported firmwares as preloaded MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On current platforms supported firmwares, like GuC and HuC, must be loaded by the PF driver. Mark those firmwares as 'preloaded' so we will skip fetching and loading them on the VF drivers but still correctly report them as 'running'. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240404173814.715-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_uc_fw.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_uc_fw.h | 8 ++++++-- drivers/gpu/drm/xe/xe_uc_fw_types.h | 3 ++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index ec62296aec33..186f81640cef 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -17,6 +17,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_sriov.h" #include "xe_uc_fw.h" /* @@ -650,7 +651,17 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar xe_assert(xe, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); + + if (IS_SRIOV_VF(xe)) { + /* VF will support only firmwares that driver can autoselect */ + xe_uc_fw_change_status(uc_fw, uc_fw->path ? + XE_UC_FIRMWARE_PRELOADED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + return 0; + } + uc_fw_override(uc_fw); + xe_uc_fw_change_status(uc_fw, uc_fw->path ? XE_UC_FIRMWARE_SELECTED : XE_UC_FIRMWARE_NOT_SUPPORTED); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index 85c20795d1f8..35078038797e 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -59,6 +59,8 @@ const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) return "TRANSFERRED"; case XE_UC_FIRMWARE_RUNNING: return "RUNNING"; + case XE_UC_FIRMWARE_PRELOADED: + return "PRELOADED"; } return ""; } @@ -85,6 +87,7 @@ static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) case XE_UC_FIRMWARE_LOADABLE: case XE_UC_FIRMWARE_TRANSFERRED: case XE_UC_FIRMWARE_RUNNING: + case XE_UC_FIRMWARE_PRELOADED: return 0; } return -EINVAL; @@ -134,7 +137,8 @@ static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw) static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw) { - return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE; + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE && + __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_PRELOADED; } static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) @@ -144,7 +148,7 @@ static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw) { - return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING; + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_RUNNING; } static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index bc800b696866..0d8caa0e7354 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -50,7 +50,8 @@ enum xe_uc_fw_status { XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ - XE_UC_FIRMWARE_RUNNING /* init/auth done */ + XE_UC_FIRMWARE_RUNNING, /* init/auth done */ + XE_UC_FIRMWARE_PRELOADED, /* preloaded by the PF driver */ }; enum xe_uc_fw_type { -- cgit v1.2.3 From 117de185edf2c5767f03575219bf7a43b161ff0d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 5 Apr 2024 13:07:11 -0700 Subject: drm/xe/display: Fix double mutex initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All of these mutexes are already initialized by the display side since commit 3fef3e6ff86a ("drm/i915: move display mutex inits to display code"), so the xe shouldn´t initialize them. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Cc: Jani Nikula Cc: Arun R Murthy Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240405200711.2041428-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_display.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index e4db069f0db3..6ec375c1c4b6 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe) xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); drmm_mutex_init(&xe->drm, &xe->sb_lock); - drmm_mutex_init(&xe->drm, &xe->display.backlight.lock); - drmm_mutex_init(&xe->drm, &xe->display.audio.mutex); - drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex); - drmm_mutex_init(&xe->drm, &xe->display.pps.mutex); - drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex); xe->enabled_irq_mask = ~0; err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); -- cgit v1.2.3 From 335ad807d59526c47ca7e535b571fffc9d38a600 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 5 Apr 2024 08:38:49 -0700 Subject: drm/xe: Remove debug message from migrate_clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This messages is printed a lot and from my understanding it do not bring any value, so here dropping it. Signed-off-by: José Roberto de Souza Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240405153849.44906-1-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index fe713d57cbf6..9f6e9b7f11c8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -985,7 +985,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; - int pass = 0; if (!clear_vram) xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); @@ -1006,8 +1005,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_L0 = xe_migrate_res_sizes(m, &src_it); - drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); - /* Calculate final sizes and batch size.. */ batch_size = 2 + pte_update_size(m, clear_vram, src, &src_it, -- cgit v1.2.3 From dc30c6e7149baaae4288c742de95212b31f07438 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 4 Apr 2024 09:12:56 -0700 Subject: drm/xe: Label RING_CONTEXT_CONTROL as masked RING_CONTEXT_CONTROL is a masked register. v2: Also clean up setting register value (Lucas) Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240404161256.3852502-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index a08528d9c76b..af71b87d8030 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -122,7 +122,7 @@ #define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) -#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244) +#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 552ebf6eeee7..615bbc372ac6 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -543,9 +543,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) { - regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); /* TODO: Timestamp */ } -- cgit v1.2.3 From b611dad092b6bf80f96641126a321d1658c93213 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 8 Apr 2024 08:13:12 -0700 Subject: drm/xe: Remove dead clock code xe_gt_clock_cycles_to_ns() is not called from anywhere after PMU handling was removed in commit 90a8b23f9b85 ("drm/xe/pmu: Remove PMU from Xe till uapi is finalized"). Drop it. Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240408151312.2100304-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_clock.c | 5 ----- drivers/gpu/drm/xe/xe_gt_clock.h | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 937054e31d72..c7bca20f6b65 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -78,8 +78,3 @@ int xe_gt_clock_init(struct xe_gt *gt) gt->info.reference_clock = freq; return 0; } - -u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count) -{ - return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock); -} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h index aa162722f859..44fa0371b973 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.h +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -11,5 +11,5 @@ struct xe_gt; int xe_gt_clock_init(struct xe_gt *gt); -u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count); + #endif -- cgit v1.2.3 From 31ced035ecde7b24aef57c3c4b85bbc3283c81f2 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 29 Mar 2024 12:44:03 +0000 Subject: drm/xe/uapi: Restore flags VM_BIND_FLAG_READONLY and VM_BIND_FLAG_IMMEDIATE The commit 84a1ed5e6756 ("drm/xe/uapi: Remove unused flags") is partially reverted. At the time, flags not used by user space were removed during cleanup. Some flags now needed by the compute runtime are brought back in this commit: - DRM_XE_VM_BIND_FLAG_READONLY is used to write protect kernel ISA thus preventing accidental overwrites. - DRM_XE_VM_BIND_FLAG_IMMEDIATE is used to trigger mapping at the time of binding in order to prevent faulting at execution time. The changes in the compute runtime are ready and approved, see link below. v2: Include a link to the PR in the commit message (Matthew Brost) v3: Update kernel doc and improve commit message (Lucas De Marchi) Cc: Mateusz Jablonski Cc: Michal Mrozek Cc: Matthew Brost Cc: Lucas De Marchi Link: https://github.com/intel/compute-runtime/pull/717 Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240329124403.7-1-francois.dugast@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 13 +++++++++++-- drivers/gpu/drm/xe/xe_vm_types.h | 4 ++++ include/uapi/drm/xe_drm.h | 8 ++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index fff10e1717ff..66b70fd3d105 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2213,6 +2213,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.immediate = + flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; + op->map.read_only = + flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; @@ -2407,6 +2411,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { + flags |= op->map.read_only ? + VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; flags |= op->map.dumpable ? @@ -2551,7 +2557,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - !xe_vm_in_fault_mode(vm), + op->map.immediate || !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2826,7 +2832,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS \ + (DRM_XE_VM_BIND_FLAG_READONLY | \ + DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ + DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index badf3945083d..0447c79c40a2 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -276,6 +276,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + /** @immediate: Immediate bind */ + bool immediate; + /** @read_only: Read only */ + bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @dumpable: whether BO is dumped on GPU hang */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 95a8ecca21f4..1446c3bae515 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -871,6 +871,12 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: + * - %DRM_XE_VM_BIND_FLAG_READONLY - Setup the page tables as read-only + * to ensure write protection + * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - On a faulting VM, do the + * MAP operation immediately rather than deferring the MAP to the page + * fault handler. This is implied on a non-faulting VM as there is no + * fault handler to defer to. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -963,6 +969,8 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; +#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ -- cgit v1.2.3 From 0417a5f84810eaef43c1e64a11df4e0238b4bf09 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 5 Apr 2024 14:16:31 -0700 Subject: drm/xe: Always capture exec queues on snapshot Always capture exec queues on snapshot regardless if exec queue has pending jobs or not. Having jobs or not does indicate whether the exec queue capture is useful. Example bugs that would not be easily detected by skipping capture when pending job list is empty: - Jobs pending on exec queue have dependencies - Leaking exec queue refs - GuC protocol issues (i.e. losing G2H) In addition to above bugs, in general it just useful to see every exec queue registered with the GuC and its state. Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240405211632.223568-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 25 +++---------------------- drivers/gpu/drm/xe/xe_guc_submit.h | 4 ++-- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a951043b2943..283ca7518aff 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -188,7 +188,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); - coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); + coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q); coredump->snapshot.job = xe_sched_job_snapshot_capture(job); coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 61e7a8fbd18c..c7d38469fb46 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1775,7 +1775,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps /** * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @job: faulty Xe scheduled job. + * @q: faulty exec queue * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -1784,9 +1784,8 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps * caller, using `xe_guc_exec_queue_snapshot_free`. */ struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) { - struct xe_exec_queue *q = job->q; struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_guc_submit_exec_queue_snapshot *snapshot; int i; @@ -1942,28 +1941,10 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) { struct xe_guc_submit_exec_queue_snapshot *snapshot; - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_sched_job *job; - bool found = false; - spin_lock(&sched->base.job_list_lock); - list_for_each_entry(job, &sched->base.pending_list, drm.list) { - if (job->q == q) { - xe_sched_job_get(job); - found = true; - break; - } - } - spin_unlock(&sched->base.job_list_lock); - - if (!found) - return; - - snapshot = xe_guc_exec_queue_snapshot_capture(job); + snapshot = xe_guc_exec_queue_snapshot_capture(q); xe_guc_exec_queue_snapshot_print(snapshot, p); xe_guc_exec_queue_snapshot_free(snapshot); - - xe_sched_job_put(job); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 2f14dfd04722..fad0421ead36 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -9,8 +9,8 @@ #include struct drm_printer; +struct xe_exec_queue; struct xe_guc; -struct xe_sched_job; int xe_guc_submit_init(struct xe_guc *guc); @@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job); +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); void xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot); void -- cgit v1.2.3 From 1db3594c595f4eb24e5a34be2912a2dc0586d4b1 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 5 Apr 2024 14:16:32 -0700 Subject: drm/xe: Capture GuC CT snapshot when stopped It is useful capture the GuC CT snapshot if the GuC CT has been forcefully put into the stopped state. Enable snapshot capture when in this state. Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240405211632.223568-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 6c37f4f9bddd..0aa3abaca66d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1403,7 +1403,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, return NULL; } - if (xe_guc_ct_enabled(ct)) { + if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) { snapshot->ct_enabled = true; snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding); guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, -- cgit v1.2.3 From d6da81a4785ca6e2f0ea9082424e725e8aad69b4 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Fri, 5 Apr 2024 14:12:30 +0530 Subject: drm/xe/guc: Add support for workaround KLVs To prevent running out of bits, new workaround (w/a) enable flags are being added via a KLV system instead of a 32 bit flags word. v2: GuC version check > 70.10 is not needed as base line xe doesnot support anything below < 70.19 v3: Use 64 bit ggtt address for future compatibility (John Harrison/Daniele) v4: %s/PAGE_SIZE/SZ_4K/ (Michal) Cc: John Harrison Signed-off-by: Badal Nilawar Reviewed-by: John Harrison Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240405084231.3620848-2-badal.nilawar@intel.com --- drivers/gpu/drm/xe/xe_guc_ads.c | 62 +++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_guc_ads_types.h | 2 ++ drivers/gpu/drm/xe/xe_guc_fwif.h | 5 ++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index e025f3e10c9b..0a8f27243c84 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -80,6 +80,10 @@ ads_to_map(struct xe_guc_ads *ads) * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned + * | w/a KLVs | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned * | capture lists | * +---------------------------------------+ * | padding | @@ -131,6 +135,11 @@ static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) return PAGE_ALIGN(ads->golden_lrc_size); } +static u32 guc_ads_waklv_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads->ads_waklv_size); +} + static size_t guc_ads_capture_size(struct xe_guc_ads *ads) { /* FIXME: Allocate a proper capture list */ @@ -167,12 +176,22 @@ static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) return PAGE_ALIGN(offset); } +static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads) +{ + u32 offset; + + offset = guc_ads_golden_lrc_offset(ads) + + guc_ads_golden_lrc_size(ads); + + return PAGE_ALIGN(offset); +} + static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) { size_t offset; - offset = guc_ads_golden_lrc_offset(ads) + - guc_ads_golden_lrc_size(ads); + offset = guc_ads_waklv_offset(ads) + + guc_ads_waklv_size(ads); return PAGE_ALIGN(offset); } @@ -260,6 +279,43 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) return total_size; } +static void guc_waklv_init(struct xe_guc_ads *ads) +{ + u64 addr_ggtt; + u32 offset, remain, size; + + offset = guc_ads_waklv_offset(ads); + remain = guc_ads_waklv_size(ads); + + /* Add workarounds here + * + * if (XE_WA(gt, wa_id)) + * guc_waklv_enable_simple(ads, + * wa_klv_id, + * &offset, &remain); + */ + + size = guc_ads_waklv_size(ads) - remain; + if (!size) + return; + + offset = guc_ads_waklv_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt)); + ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt)); + ads_blob_write(ads, ads.wa_klv_size, size); +} + +static int calculate_waklv_size(struct xe_guc_ads *ads) +{ + /* + * A single page is both the minimum size possible and + * is sufficiently large enough for all current platforms. + */ + return SZ_4K; +} + #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) int xe_guc_ads_init(struct xe_guc_ads *ads) @@ -271,6 +327,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) ads->golden_lrc_size = calculate_golden_lrc_size(ads); ads->regset_size = calculate_regset_size(gt); + ads->ads_waklv_size = calculate_waklv_size(ads); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, XE_BO_FLAG_SYSTEM | @@ -598,6 +655,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_mapping_table_init(gt, &info_map); guc_capture_list_init(ads); guc_doorbell_init(ads); + guc_waklv_init(ads); if (xe->info.has_usm) { guc_um_init_params(ads); diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h index 4afe44bece4b..2de5decfe0fd 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h @@ -20,6 +20,8 @@ struct xe_guc_ads { size_t golden_lrc_size; /** @regset_size: size of register set passed to GuC for save/restore */ u32 regset_size; + /** @ads_waklv_size: total waklv size supported by platform */ + u32 ads_waklv_size; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 5474025271e3..19ee71aeaf17 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -209,7 +209,10 @@ struct guc_ads { u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; - u32 reserved[14]; + u32 wa_klv_addr_lo; + u32 wa_klv_addr_hi; + u32 wa_klv_size; + u32 reserved[11]; } __packed; /* Engine usage stats */ -- cgit v1.2.3 From c151ff5c9053338ca9c7fc6fa6435e210cfd5ca7 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Fri, 5 Apr 2024 14:12:31 +0530 Subject: drm/xe/lnl: Enable GuC Wa_14019882105 Enable GuC Wa_14019882105 to block interrupts during C6 flow when the memory path has been blocked v2: Make helper function generic and name it as guc_waklv_enable_simple (John Harrison) v3: Make warning descriptive (John Harrison) v4: s/drm_WARN/xe_gt_WARN/ (Michal) Cc: John Harrison Signed-off-by: Badal Nilawar Reviewed-by: John Harrison Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240405084231.3620848-3-badal.nilawar@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 7 +++++++ drivers/gpu/drm/xe/xe_guc_ads.c | 38 ++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 0400bc0fccdc..5dd45e06f0b6 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -319,4 +319,11 @@ enum { #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u +/* + * Workaround keys: + */ +enum xe_guc_klv_ids { + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 0a8f27243c84..757cbbb87869 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -7,6 +7,8 @@ #include +#include + #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" @@ -19,6 +21,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_wa.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 @@ -279,21 +282,42 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) return total_size; } +static void guc_waklv_enable_simple(struct xe_guc_ads *ads, + enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) +{ + u32 klv_entry[] = { + /* 16:16 key/length */ + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | + FIELD_PREP(GUC_KLV_0_LEN, 0), + /* 0 dwords data */ + }; + u32 size; + + size = sizeof(klv_entry); + + if (xe_gt_WARN(ads_to_gt(ads), *remain < size, + "w/a klv buffer too small to add klv id %d\n", klv_id)) + return; + + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, + klv_entry, size); + *offset += size; + *remain -= size; +} + static void guc_waklv_init(struct xe_guc_ads *ads) { + struct xe_gt *gt = ads_to_gt(ads); u64 addr_ggtt; u32 offset, remain, size; offset = guc_ads_waklv_offset(ads); remain = guc_ads_waklv_size(ads); - /* Add workarounds here - * - * if (XE_WA(gt, wa_id)) - * guc_waklv_enable_simple(ads, - * wa_klv_id, - * &offset, &remain); - */ + if (XE_WA(gt, 14019882105)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, + &offset, &remain); size = guc_ads_waklv_size(ads) - remain; if (!size) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 68600cdead84..98a81468bc8e 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -20,3 +20,4 @@ MEDIA_VERSION(1300) PLATFORM(DG2) 14018094691 GRAPHICS_VERSION(2004) +14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) -- cgit v1.2.3 From b39c7056d01be638523ef9b4bfb5b60337ba08fb Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Fri, 5 Apr 2024 18:31:24 +0530 Subject: drm/xe: Define xe_reg_is_valid Add a function to check if struct xe_reg has valid address. v2: - Rebase. - Make xe_reg_is_valid as inline function instead of a macro. (Badal). - Update commit msg. Signed-off-by: Karthik Poosa Suggested-by: Lucas De Marchi Cc: Badal Nilawar Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20240405130127.1392426-2-karthik.poosa@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index c89ef2b79a3f..23f7dc5bbe99 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -131,4 +131,9 @@ struct xe_reg_mcr { .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ }) +static inline bool xe_reg_is_valid(struct xe_reg r) +{ + return r.addr; +} + #endif -- cgit v1.2.3 From a50b794c924352603eb37ec5a279a19907fe2587 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Fri, 5 Apr 2024 18:31:25 +0530 Subject: drm/xe/hwmon: Update xe_hwmon_get_reg to return struct xe_reg Return struct xe_reg instead of reg.raw from xe_hwmon_get_reg to have abstracted usage of struct xe_reg. v2: - Use xe_reg_is_valid function instead of XE_REG_IS_VALID macro as it is removed. Signed-off-by: Karthik Poosa Suggested-by: Lucas De Marchi Cc: Badal Nilawar Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240405130127.1392426-3-karthik.poosa@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hwmon.c | 47 ++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 7e8caac838e0..a3b2ec0d24d8 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -79,46 +79,46 @@ struct xe_hwmon { struct xe_hwmon_energy_info ei[CHANNEL_MAX]; }; -static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel) +static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, + int channel) { struct xe_device *xe = gt_to_xe(hwmon->gt); - struct xe_reg reg = XE_REG(0); switch (hwmon_reg) { case REG_PKG_RAPL_LIMIT: if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) - reg = PVC_GT0_PACKAGE_RAPL_LIMIT; + return PVC_GT0_PACKAGE_RAPL_LIMIT; else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) - reg = PCU_CR_PACKAGE_RAPL_LIMIT; + return PCU_CR_PACKAGE_RAPL_LIMIT; break; case REG_PKG_POWER_SKU: if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) - reg = PVC_GT0_PACKAGE_POWER_SKU; + return PVC_GT0_PACKAGE_POWER_SKU; else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) - reg = PCU_CR_PACKAGE_POWER_SKU; + return PCU_CR_PACKAGE_POWER_SKU; break; case REG_PKG_POWER_SKU_UNIT: if (xe->info.platform == XE_PVC) - reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT; + return PVC_GT0_PACKAGE_POWER_SKU_UNIT; else if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; + return PCU_CR_PACKAGE_POWER_SKU_UNIT; break; case REG_GT_PERF_STATUS: if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG) - reg = GT_PERF_STATUS; + return GT_PERF_STATUS; break; case REG_PKG_ENERGY_STATUS: if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) - reg = PVC_GT0_PLATFORM_ENERGY_STATUS; + return PVC_GT0_PLATFORM_ENERGY_STATUS; else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) - reg = PCU_CR_PACKAGE_ENERGY_STATUS; + return PCU_CR_PACKAGE_ENERGY_STATUS; break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; } - return reg.raw; + return XE_REG(0); } static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, @@ -127,9 +127,9 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon { struct xe_reg reg; - reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg, channel); + reg = xe_hwmon_get_reg(hwmon, hwmon_reg, channel); - if (!reg.raw) + if (!xe_reg_is_valid(reg)) return; switch (operation) { @@ -400,7 +400,7 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, xe_pm_runtime_get(gt_to_xe(hwmon->gt)); - ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index) ? attr->mode : 0; + ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; xe_pm_runtime_put(gt_to_xe(hwmon->gt)); @@ -496,16 +496,19 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) switch (attr) { case hwmon_power_max: - return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel) ? 0664 : 0; + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)) ? 0664 : 0; case hwmon_power_rated_max: - return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel) ? 0444 : 0; + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, + channel)) ? 0444 : 0; case hwmon_power_crit: if (channel == CHANNEL_PKG) return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; break; case hwmon_power_label: - return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, channel) ? 0444 : 0; + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, + channel)) ? 0444 : 0; default: return 0; } @@ -588,7 +591,8 @@ xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) switch (attr) { case hwmon_in_input: case hwmon_in_label: - return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel) ? 0444 : 0; + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, + channel)) ? 0444 : 0; default: return 0; } @@ -612,7 +616,8 @@ xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) switch (attr) { case hwmon_energy_input: case hwmon_energy_label: - return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, channel) ? 0444 : 0; + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)) ? 0444 : 0; default: return 0; } @@ -763,7 +768,7 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) * The contents of register PKG_POWER_SKU_UNIT do not change, * so read it once and store the shift values. */ - if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0)) { + if (xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0))) { xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, REG_READ32, &val_sku_unit, 0, 0, 0); hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); -- cgit v1.2.3 From 883232b47b81108b0252197c747f396ecd51455a Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Fri, 5 Apr 2024 18:31:27 +0530 Subject: drm/xe/hwmon: Cast result to output precision on left shift of operand Address potential overflow in result of left shift of a lower precision (u32) operand before assignment to higher precision (u64) variable. v2: - Update commit message. (Himal) Fixes: 4446fcf220ce ("drm/xe/hwmon: Expose power1_max_interval") Signed-off-by: Karthik Poosa Reviewed-by: Anshuman Gupta Cc: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20240405130127.1392426-5-karthik.poosa@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index a3b2ec0d24d8..453e601ddd5e 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -298,7 +298,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at * As y can be < 2, we compute tau4 = (4 | x) << y * and then add 2 when doing the final right shift to account for units */ - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; /* val in hwmon interface units (millisec) */ out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); @@ -339,7 +339,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); if (val > max_win) -- cgit v1.2.3 From 8d315b803b7c4bf363625cc1d11b4a4402bdc93d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 8 Apr 2024 22:35:35 +0530 Subject: drm/xe/xe2: Recognize Xe2_HPG IP Xe2_HPG uses the same general feature flags as Xe2_LPG. Xe2_HPG is identified as version 20.01 in the GMD_ID register. Bspec: 68090 Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-2-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 7ce37b28bfa4..f86f1d0d893d 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -174,7 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = { GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) static const struct xe_graphics_desc graphics_xe2 = { - .name = "Xe2_LPG", + .name = "Xe2_LPG / Xe2_HPG", XE2_GFX_FEATURES, }; @@ -344,6 +344,7 @@ static const struct gmdid_map graphics_ip_map[] = { { 1270, &graphics_xelpg }, { 1271, &graphics_xelpg }, { 1274, &graphics_xelpg }, /* Xe_LPG+ */ + { 2001, &graphics_xe2 }, { 2004, &graphics_xe2 }, }; -- cgit v1.2.3 From 90d308655e414669833c01721e203fa4876ba6d9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 8 Apr 2024 22:35:36 +0530 Subject: drm/xe/xe2: Recognize Xe2_HPM IP Xe2_HPM uses the same general feature flags as Xe2_LPM. Xe2_HPM is identified as version 13.01 in the GMD_ID register. Bspec: 68090, 67163 Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-3-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f86f1d0d893d..15f626dbf10b 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -207,7 +207,7 @@ static const struct xe_media_desc media_xelpmp = { }; static const struct xe_media_desc media_xe2 = { - .name = "Xe2_LPM", + .name = "Xe2_LPM / Xe2_HPM", .hw_engine_mask = BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ }; @@ -351,6 +351,7 @@ static const struct gmdid_map graphics_ip_map[] = { /* Map of GMD_ID values to media IP */ static const struct gmdid_map media_ip_map[] = { { 1300, &media_xelpmp }, + { 1301, &media_xe2 }, { 2000, &media_xe2 }, }; -- cgit v1.2.3 From 27cc23111a664bbf086e74fd4e14d38634f4b834 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 8 Apr 2024 22:35:37 +0530 Subject: drm/xe/bmg: Add BMG platform definition BMG is a discrete GPU based on the Xe2 architecture. No device ids are bound to the BMG platform descriptor yet. BMG device ids will be added once we have all the basic required platform enabling patches landed. v2: Removed device ids, deferring it to a later patch v3: Squash in compat header IS_BATTLEMAGE() patch. (Lucas) Bspec: 68090 Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-4-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 + drivers/gpu/drm/xe/xe_pci.c | 6 ++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index fef969112b1d..90a279800612 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -90,6 +90,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC) #define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) #define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) +#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE) #define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) #define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 15f626dbf10b..fa2cc80a08a3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -336,6 +336,12 @@ static const struct xe_device_desc lnl_desc = { .require_force_probe = true, }; +static const struct xe_device_desc bmg_desc __maybe_unused = { + DGFX_FEATURES, + PLATFORM(XE_BATTLEMAGE), + .require_force_probe = true, +}; + #undef PLATFORM __diag_pop(); diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index 553f53dbd093..79b7042c4534 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -22,6 +22,7 @@ enum xe_platform { XE_PVC, XE_METEORLAKE, XE_LUNARLAKE, + XE_BATTLEMAGE, }; enum xe_subplatform { -- cgit v1.2.3 From bdf59bbd9aea39ce3eca088e84a01717867bd5d4 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Mon, 8 Apr 2024 22:35:38 +0530 Subject: drm/xe/bmg: Add BMG mocs table BMG uses the same MOCS table as LNL. Bpsec: 71582 CC: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-5-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_mocs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index d16fa64da881..1e92f8ee07ba 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -375,6 +375,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, switch (xe->info.platform) { case XE_LUNARLAKE: + case XE_BATTLEMAGE: info->size = ARRAY_SIZE(xe2_mocs_table); info->table = xe2_mocs_table; info->n_entries = XE2_NUM_MOCS_ENTRIES; -- cgit v1.2.3 From 183620f9ae3c8ebd9e126e07020b235d34547eb4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 8 Apr 2024 22:35:39 +0530 Subject: drm/xe/bmg: Program an additional discrete-specific PAT setting Discrete Xe2 platforms require programming of one additional row of PAT settings which controls the access characteristics for PPGTT and LMTT page tables. Integrated GPUs do not need this programming and will leave the register at its hardware default value. Bspec: 71582 Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-6-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_pat.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 66d8e3dd8237..13812042177d 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -142,6 +142,7 @@ static const struct xe_pat_table_entry xe2_pat_table[] = { /* Special PAT values programmed outside the main table */ static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 ); +static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 ); u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) { @@ -302,6 +303,9 @@ static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry { program_pat_mcr(gt, table, n_entries); xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); + + if (IS_DGFX(gt_to_xe(gt))) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value); } static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], @@ -309,6 +313,9 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry { program_pat(gt, table, n_entries); xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value); + + if (IS_DGFX(gt_to_xe(gt))) + xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value); } static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) -- cgit v1.2.3 From b5c2ca0372dcf5bb7403032ca609c9df1fb23bfc Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Mon, 8 Apr 2024 22:35:40 +0530 Subject: drm/xe/xe2hpg: Determine flat ccs offset for vram on Xe2 dgfx platform determine the offset using Flat CCS size bitfield of XE2_FLAT_CCS_BASE_RANGE_[UPPER/LOWER] mcr registers. v2: function argument tile_size changed from pass by reference to pass by value Bspec: 68023 Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Akshata Jahagirdar Signed-off-by: Matthew Auld Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-7-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 +++++ drivers/gpu/drm/xe/xe_mmio.c | 39 ++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 6617c86a096b..d404f211bc36 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -69,6 +69,7 @@ #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) +#define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) #define WM_CHICKEN3 XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED) #define HIZ_PLANE_COMPRESSION_DIS REG_BIT(10) @@ -142,6 +143,10 @@ #define XE2_FLAT_CCS_BASE_RANGE_LOWER XE_REG_MCR(0x8800) #define XE2_FLAT_CCS_ENABLE REG_BIT(0) +#define XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK REG_GENMASK(31, 6) + +#define XE2_FLAT_CCS_BASE_RANGE_UPPER XE_REG_MCR(0x8804) +#define XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK REG_GENMASK(7, 0) #define GSCPSMI_BASE XE_REG(0x880c) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5d13fc7cb9d2..d66da1a9f165 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -163,6 +163,42 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) return 0; } +static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) +{ + struct xe_device *xe = gt_to_xe(gt); + u64 offset; + u32 reg; + + if (GRAPHICS_VER(xe) >= 20) { + u64 ccs_size = tile_size / 512; + u64 offset_hi, offset_lo; + u32 nodes, num_enabled; + + reg = xe_mmio_read32(gt, MIRROR_FUSE3); + nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg); + num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */ + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); + offset_lo = REG_FIELD_GET(XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK, reg); + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_UPPER); + offset_hi = REG_FIELD_GET(XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK, reg); + + offset = offset_hi << 32; /* HW view bits 39:32 */ + offset |= offset_lo << 6; /* HW view bits 31:6 */ + offset *= num_enabled; /* convert to SW view */ + + /* We don't expect any holes */ + xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), + "Hole between CCS and GSM.\n"); + } else { + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K; + } + + return offset; +} + /** * xe_mmio_tile_vram_size() - Collect vram size and offset information * @tile: tile to get info for @@ -207,8 +243,7 @@ static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, /* minus device usage */ if (xe->info.has_flat_ccs) { - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); - offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + offset = get_flat_ccs_offset(gt, *tile_size); } else { offset = xe_mmio_read64_2x32(gt, GSMBASE); } -- cgit v1.2.3 From e9c22984e9d81cbaebb3e1085d2f510258ef63c4 Mon Sep 17 00:00:00 2001 From: Akshata Jahagirdar Date: Mon, 8 Apr 2024 22:35:41 +0530 Subject: drm/xe/xe2hpg: Remove extra allocation of CCS pages for dgfx On Xe2 dGPU, compression is only supported with VRAM. When copying from VRAM -> system memory the KMD uses mapping with uncompressed PAT so the copy in system memory is guaranteed to be uncompressed. When restoring such buffers from system memory -> VRAM the KMD can't easily know which pages were originally compressed, so we always use uncompressed -> uncompressed here. so this means that there's no need for extra CCS storage on such platforms. v2: More description added to commit message Signed-off-by: Akshata Jahagirdar Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-8-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6166bc715656..fdeb3691d3f6 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2201,6 +2201,9 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); + if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) + return false; + if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device) return false; -- cgit v1.2.3 From 74671d23ca1803123de2d2eaf73f6b91b6b51f55 Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Mon, 8 Apr 2024 22:35:42 +0530 Subject: drm/xe/xe2: Add workaround 18034896535 Add 18034896535 as driver permanent workaround. v2: 18034896535 and 16021540221 are two independent workarounds that just happen to have the same implementation, hence keeping it. Signed-off-by: Bommu Krishnaiah Reviewed-by: Tejas Upadhyay Cc: Tejas Upadhyay Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-9-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_wa.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c904e55ced9c..43fac92e5d20 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -428,6 +428,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, + { XE_RTP_NAME("18034896535"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) + }, { XE_RTP_NAME("14019322943"), XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), FUNC(xe_rtp_match_first_render_or_compute)), -- cgit v1.2.3 From 7f3ee7d880588f1b67d47593f4960edae3a776ad Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Mon, 8 Apr 2024 22:35:43 +0530 Subject: drm/xe/xe2hpg: Add initial GT workarounds Add the initial set of Xe2_HPG gt/engine/lrc workarounds. v2: Removed WA_16020183090 which is no more applicable Extended WA_18033852989,18034896535 also to xe2hpg Signed-off-by: Haridhar Kalvala Signed-off-by: Clint Taylor Signed-off-by: Gustavo Sousa Signed-off-by: Dnyaneshar Bhadane Signed-off-by: Shekhar Chauhan Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-10-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 +++ drivers/gpu/drm/xe/xe_wa.c | 67 ++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d404f211bc36..0ce79ba19bda 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -74,6 +74,9 @@ #define WM_CHICKEN3 XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED) #define HIZ_PLANE_COMPRESSION_DIS REG_BIT(10) +#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) +#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) + #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) @@ -355,6 +358,7 @@ #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) #define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED) +#define XE2_EUPEND_CHK_FLUSH_DIS REG_BIT(14) #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) @@ -385,6 +389,7 @@ #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) +#define WR_REQ_CHAINING_DIS REG_BIT(26) #define TGM_WRITE_EOM_FORCE REG_BIT(17) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) #define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 43fac92e5d20..014d27c126ae 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -429,7 +429,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, { XE_RTP_NAME("18034896535"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, @@ -464,6 +464,55 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, + + /* Xe2_HPG */ + + { XE_RTP_NAME("16018712365"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) + }, + { XE_RTP_NAME("16018737384"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) + }, + { XE_RTP_NAME("14019988906"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) + }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, + { XE_RTP_NAME("14020338487"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) + }, + { XE_RTP_NAME("18032247524"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) + }, + { XE_RTP_NAME("14018471104"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) + }, + /* + * Although this workaround isn't required for the RCS, disabling these + * reports has no impact for our driver or the GuC, so we go ahead and + * apply this to all engines for simplicity. + */ + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(GRAPHICS_VERSION(2001)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + { XE_RTP_NAME("14019811474"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) + }, + {} }; @@ -585,10 +634,24 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) }, { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, + /* Xe2_HPG */ + { XE_RTP_NAME("15010599737"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) + }, + { XE_RTP_NAME("14019386621"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) + }, + { XE_RTP_NAME("14020756599"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) + }, + {} }; -- cgit v1.2.3 From e391ab659b547676b15ba058786e91d1d1d3ad4d Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 8 Apr 2024 22:35:44 +0530 Subject: drm/xe/xe2hpg: Introduce performance tuning changes for Xe2_HPG. Introduces performance tuning guide changes for Xe_HPG. v2: Switched to open upper bound for "Tuning: L3 Cache" setting. BSpec: 72161 Signed-off-by: Shekhar Chauhan Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-11-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_tuning.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index bb6db2817ada..d4e6fa918942 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -28,7 +28,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { /* Xe2 */ { XE_RTP_NAME("Tuning: L3 cache"), - XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, @@ -38,11 +38,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: Compression Overfetch"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, {} -- cgit v1.2.3 From 7cd05ef89c9d1c63cbf2bb904c831073eb919228 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Mon, 8 Apr 2024 22:35:45 +0530 Subject: drm/xe/xe2hpm: Add initial set of workarounds Define the initial set of workarounds for Xe2_HPM. Signed-off-by: Gustavo Sousa Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240408170545.3769566-12-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ++++ drivers/gpu/drm/xe/xe_wa.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 0ce79ba19bda..8fe811ea404a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -280,6 +280,10 @@ #define FORCEWAKE_GT XE_REG(0xa188) #define PG_ENABLE XE_REG(0xa210) +#define VD2_MFXVDENC_POWERGATE_ENABLE REG_BIT(8) +#define VD2_HCP_POWERGATE_ENABLE REG_BIT(7) +#define VD0_MFXVDENC_POWERGATE_ENABLE REG_BIT(4) +#define VD0_HCP_POWERGATE_ENABLE REG_BIT(3) #define CTC_MODE XE_REG(0xa26c) #define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 014d27c126ae..632bd9066f8d 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -228,6 +228,28 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + /* Xe2_HPM */ + + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(1301), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("14020316580"), + XE_RTP_RULES(MEDIA_VERSION(1301)), + XE_RTP_ACTIONS(CLR(PG_ENABLE, + VD0_HCP_POWERGATE_ENABLE | + VD0_MFXVDENC_POWERGATE_ENABLE | + VD2_HCP_POWERGATE_ENABLE | + VD2_MFXVDENC_POWERGATE_ENABLE)), + }, + { XE_RTP_NAME("14019449301"), + XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + {} }; @@ -513,6 +535,16 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, + /* Xe2_HPM */ + + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(MEDIA_VERSION(1301)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + {} }; -- cgit v1.2.3 From 5d6678882da71491768c6d272199360b16d2e1f6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 9 Apr 2024 12:51:05 +0200 Subject: drm/xe: Assert pat.ops function pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that pat.ops (if selected) has all required function pointers setup. Only .program_media may be omitted if we have older media version. This should help avoid late runtime checks against individual function pointers. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Cc: Matt Roper Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240409105106.1067-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_pat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 13812042177d..d76831a4e5d1 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -435,6 +435,10 @@ void xe_pat_init_early(struct xe_device *xe) /* VFs can't program nor dump PAT settings */ if (IS_SRIOV_VF(xe)) xe->pat.ops = NULL; + + xe_assert(xe, !xe->pat.ops || xe->pat.ops->dump); + xe_assert(xe, !xe->pat.ops || xe->pat.ops->program_graphics); + xe_assert(xe, !xe->pat.ops || MEDIA_VER(xe) < 13 || xe->pat.ops->program_media); } void xe_pat_init(struct xe_gt *gt) -- cgit v1.2.3 From a918e771e6fbe1fa68932af5b0cdf473e23090cc Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 9 Apr 2024 12:51:06 +0200 Subject: drm/xe: Check pat.ops before dumping PAT settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We may leave pat.ops unset when running on brand new platform or when running as a VF. While the former is unlikely, the latter is valid (future) use case and will cause NPD when someone will try to dump PAT settings by debugfs. It's better to check pointer to pat.ops instead of specific .dump hook, as we have this hook always defined for every .ops variant. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240409105106.1067-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_pat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index d76831a4e5d1..d5b516f115ad 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -458,7 +458,7 @@ void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - if (!xe->pat.ops->dump) + if (!xe->pat.ops) return; xe->pat.ops->dump(gt, p); -- cgit v1.2.3 From e806fac0bd35512ac4590d89dabe0eb15a1721c4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 17:44:29 +0200 Subject: drm/xe: Add max_vfs module parameter We want to have an option to limit the number of the VFs that the PF driver will be able to manage. With this limit set to zero we will also have a way to completely disable the PF functionality. Since we currently don't support SR-IOV on any platform, we start with this limit set to zero by default. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240404154431.583-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_module.c | 7 +++++++ drivers/gpu/drm/xe/xe_module.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 110b69864656..ceb8345cbca6 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -48,6 +48,13 @@ module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); +#ifdef CONFIG_PCI_IOV +module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); +MODULE_PARM_DESC(max_vfs, + "Limit number of Virtual Functions (VFs) that could be managed. " + "(0 = no VFs [default]; N = allow up to N VFs)"); +#endif + struct init_funcs { int (*init)(void); void (*exit)(void); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 88ef0e8b2bfd..b369984f08ec 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -18,6 +18,9 @@ struct xe_modparam { char *huc_firmware_path; char *gsc_firmware_path; char *force_probe; +#ifdef CONFIG_PCI_IOV + unsigned int max_vfs; +#endif }; extern struct xe_modparam xe_modparam; -- cgit v1.2.3 From 146e4384956926d95c7636020adb5c4949c690da Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 17:44:30 +0200 Subject: drm/xe: Add proper detection of the SR-IOV PF mode SR-IOV PF mode detection is based on PCI capability as reported by the PCI dev_is_pf() function and additionally on 'max_vfs' module parameter which could be also used to disable PF capability even if SR-IOV PF capability is reported by the hardware. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240404154431.583-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 3 +- drivers/gpu/drm/xe/xe_device_types.h | 4 ++ drivers/gpu/drm/xe/xe_sriov.c | 3 ++ drivers/gpu/drm/xe/xe_sriov_pf.c | 89 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf.h | 24 ++++++++++ drivers/gpu/drm/xe/xe_sriov_types.h | 15 ++++++ 6 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf.c create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 21316ee47026..e106767c9a6e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -159,7 +159,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf_control.o \ xe_lmtt.o \ xe_lmtt_2l.o \ - xe_lmtt_ml.o + xe_lmtt_ml.o \ + xe_sriov_pf.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c710cec835a7..8244b177a6a3 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -321,6 +321,10 @@ struct xe_device { struct { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; + + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; } sriov; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 3e103edf7174..94fa98d8206e 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -11,6 +11,7 @@ #include "xe_device.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -58,6 +59,8 @@ void xe_sriov_probe_early(struct xe_device *xe) if (has_sriov) { if (test_is_vf(xe)) mode = XE_SRIOV_MODE_VF; + else if (xe_sriov_pf_readiness(xe)) + mode = XE_SRIOV_MODE_PF; } xe_assert(xe, !xe->sriov.__mode); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c new file mode 100644 index 000000000000..030c2b69ecc4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_module.h" +#include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_printk.h" + +static unsigned int wanted_max_vfs(struct xe_device *xe) +{ + return xe_modparam.max_vfs; +} + +static int pf_reduce_totalvfs(struct xe_device *xe, int limit) +{ + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + err = pci_sriov_set_totalvfs(pdev, limit); + if (err) + xe_sriov_notice(xe, "Failed to set number of VFs to %d (%pe)\n", + limit, ERR_PTR(err)); + return err; +} + +static bool pf_continue_as_native(struct xe_device *xe, const char *why) +{ + xe_sriov_dbg(xe, "%s, continuing as native\n", why); + pf_reduce_totalvfs(xe, 0); + return false; +} + +/** + * xe_sriov_pf_readiness - Check if PF functionality can be enabled. + * @xe: the &xe_device to check + * + * This function is called as part of the SR-IOV probe to validate if all + * PF prerequisites are satisfied and we can continue with enabling PF mode. + * + * Return: true if the PF mode can be turned on. + */ +bool xe_sriov_pf_readiness(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + int totalvfs = pci_sriov_get_totalvfs(pdev); + int newlimit = min_t(u16, wanted_max_vfs(xe), totalvfs); + + xe_assert(xe, totalvfs <= U16_MAX); + + if (!dev_is_pf(dev)) + return false; + + if (!xe_device_uc_enabled(xe)) + return pf_continue_as_native(xe, "Guc submission disabled"); + + if (!newlimit) + return pf_continue_as_native(xe, "all VFs disabled"); + + pf_reduce_totalvfs(xe, newlimit); + + xe->sriov.pf.device_total_vfs = totalvfs; + xe->sriov.pf.driver_max_vfs = newlimit; + + return true; +} + +/** + * xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information. + * @xe: the &xe_device to print info from + * @p: the &drm_printer + * + * Print SR-IOV PF related information into provided DRM printer. + */ +void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + drm_printf(p, "total: %u\n", xe->sriov.pf.device_total_vfs); + drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); + drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h new file mode 100644 index 000000000000..ebef2e01838a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_H_ +#define _XE_SRIOV_PF_H_ + +#include + +struct drm_printer; +struct xe_device; + +#ifdef CONFIG_PCI_IOV +bool xe_sriov_pf_readiness(struct xe_device *xe); +void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); +#else +static inline bool xe_sriov_pf_readiness(struct xe_device *xe) +{ + return false; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index 1a138108d139..fa583e8fa0c2 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,6 +7,7 @@ #define _XE_SRIOV_TYPES_H_ #include +#include /** * VFID - Virtual Function Identifier @@ -37,4 +38,18 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; +}; + #endif -- cgit v1.2.3 From 3df49b2e71ba28bfb468df9752f23c270bf1f273 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 17:44:31 +0200 Subject: drm/xe: Add SR-IOV info attribute to debugfs As SR-IOV support varies between platforms and the driver can run in different SR-IOV modes, add debugfs file with these details. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240404154431.583-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_debugfs.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_sriov.c | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_sriov.h | 3 +++ 3 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 86150cafe0ff..c9b30dbdc14d 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -13,6 +13,7 @@ #include "xe_device.h" #include "xe_gt_debugfs.h" #include "xe_pm.h" +#include "xe_sriov.h" #include "xe_step.h" #ifdef CONFIG_DRM_XE_DEBUG @@ -70,8 +71,18 @@ static int info(struct seq_file *m, void *data) return 0; } +static int sriov_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_sriov_print_info(xe, &p); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, + { .name = "sriov_info", .show = sriov_info, }, }; static int forcewake_open(struct inode *inode, struct file *file) diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 94fa98d8206e..d324f131e3da 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -101,3 +101,17 @@ int xe_sriov_init(struct xe_device *xe) return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe); } + +/** + * xe_sriov_print_info - Print basic SR-IOV information. + * @xe: the &xe_device to print info from + * @p: the &drm_printer + * + * Print SR-IOV related information into provided DRM printer. + */ +void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p) +{ + drm_printf(p, "supported: %s\n", str_yes_no(xe_device_has_sriov(xe))); + drm_printf(p, "enabled: %s\n", str_yes_no(IS_SRIOV(xe))); + drm_printf(p, "mode: %s\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 9e3f58874e98..f9dec84d77e3 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -10,9 +10,12 @@ #include "xe_device_types.h" #include "xe_sriov_types.h" +struct drm_printer; + const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode); void xe_sriov_probe_early(struct xe_device *xe); +void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) -- cgit v1.2.3 From 933fd5ffaf87a60a019992d48e3a96b5c3403d9f Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Wed, 10 Apr 2024 14:20:04 +0530 Subject: drm/xe: check pcode init status only on root gt of root tile The root tile indicates the pcode initialization is complete when all tiles have completed their initialization. So the mailbox can be polled only on the root tile. Check pcode init status only on root tile and move it to device probe early as root tile is initialized there. Also make similar changes in resume paths. v2: add lock/unlocked version of pcode_mailbox_rw to allow pcode init to be called in device early probe (Rodrigo) v3: add code description about using root tile change function names to xe_pcode_probe_early and xe_pcode_init (Rodrigo) Signed-off-by: Riana Tauro Reviewed-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240410085005.1126343-2-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 21 +++++--- drivers/gpu/drm/xe/xe_pcode.c | 115 +++++++++++++++++++++++++---------------- drivers/gpu/drm/xe/xe_pcode.h | 6 ++- drivers/gpu/drm/xe/xe_pm.c | 16 +++--- 4 files changed, 94 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 9083f5e02dd9..a05623a8c163 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -422,8 +422,14 @@ mask_err: return err; } -/* - * Initialize MMIO resources that don't require any knowledge about tile count. +/** + * xe_device_probe_early: Device early probe + * @xe: xe device instance + * + * Initialize MMIO resources that don't require any + * knowledge about tile count. Also initialize pcode + * + * Return: 0 on success, error code on failure */ int xe_device_probe_early(struct xe_device *xe) { @@ -439,6 +445,10 @@ int xe_device_probe_early(struct xe_device *xe) if (err) return err; + err = xe_pcode_probe_early(xe); + if (err) + return err; + return 0; } @@ -517,11 +527,8 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - for_each_gt(gt, xe, id) { - err = xe_pcode_probe(gt); - if (err) - return err; - } + for_each_gt(gt, xe, id) + xe_pcode_init(gt); err = xe_display_init_noirq(xe); if (err) diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 627e094c7cbe..c010ef16fbf5 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -10,6 +10,7 @@ #include +#include "xe_device.h" #include "xe_gt.h" #include "xe_mmio.h" #include "xe_pcode_api.h" @@ -43,8 +44,6 @@ static int pcode_mailbox_status(struct xe_gt *gt) [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, }; - lockdep_assert_held(>->pcode.lock); - err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; if (err) { drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, @@ -55,17 +54,15 @@ static int pcode_mailbox_status(struct xe_gt *gt) return 0; } -static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, - unsigned int timeout_ms, bool return_data, - bool atomic) +static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + unsigned int timeout_ms, bool return_data, + bool atomic) { int err; if (gt_to_xe(gt)->info.skip_pcode) return 0; - lockdep_assert_held(>->pcode.lock); - if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) return -EAGAIN; @@ -87,6 +84,18 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, return pcode_mailbox_status(gt); } +static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + unsigned int timeout_ms, bool return_data, + bool atomic) +{ + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + + lockdep_assert_held(>->pcode.lock); + + return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic); +} + int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) { int err; @@ -109,15 +118,19 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) return err; } -static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - u32 *status, bool atomic, int timeout_us) +static int pcode_try_request(struct xe_gt *gt, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status, bool atomic, int timeout_us, bool locked) { int slept, wait = 10; for (slept = 0; slept < timeout_us; slept += wait) { - *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, - atomic); + if (locked) + *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + atomic); + else + *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + atomic); if ((*status == 0) && ((request & reply_mask) == reply)) return 0; @@ -158,8 +171,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, mutex_lock(>->pcode.lock); - ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, - false, timeout_base_ms * 1000); + ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + false, timeout_base_ms * 1000, true); if (!ret) goto out; @@ -177,8 +190,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, "PCODE timeout, retrying with preemption disabled\n"); drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); preempt_disable(); - ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, - true, timeout_base_ms * 1000); + ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + true, timeout_base_ms * 1000, true); preempt_enable(); out: @@ -238,59 +251,71 @@ unlock: } /** - * xe_pcode_init - Ensure PCODE is initialized - * @gt: gt instance + * xe_pcode_ready - Ensure PCODE is initialized + * @xe: xe instance + * @locked: true if lock held, false otherwise * - * This function ensures that PCODE is properly initialized. To be called during - * probe and resume paths. + * PCODE init mailbox is polled only on root gt of root tile + * as the root tile provides the initialization is complete only + * after all the tiles have completed the initialization. + * Called only on early probe without locks and with locks in + * resume path. * - * It returns 0 on success, and -error number on failure. + * Returns 0 on success, and -error number on failure. */ -int xe_pcode_init(struct xe_gt *gt) +int xe_pcode_ready(struct xe_device *xe, bool locked) { u32 status, request = DGFX_GET_INIT_STATUS; + struct xe_gt *gt = xe_root_mmio_gt(xe); int timeout_us = 180000000; /* 3 min */ int ret; - if (gt_to_xe(gt)->info.skip_pcode) + if (xe->info.skip_pcode) return 0; - if (!IS_DGFX(gt_to_xe(gt))) + if (!IS_DGFX(xe)) return 0; - mutex_lock(>->pcode.lock); - ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request, - DGFX_INIT_STATUS_COMPLETE, - DGFX_INIT_STATUS_COMPLETE, - &status, false, timeout_us); - mutex_unlock(>->pcode.lock); + if (locked) + mutex_lock(>->pcode.lock); + + ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request, + DGFX_INIT_STATUS_COMPLETE, + DGFX_INIT_STATUS_COMPLETE, + &status, false, timeout_us, locked); + + if (locked) + mutex_unlock(>->pcode.lock); if (ret) - drm_err(>_to_xe(gt)->drm, + drm_err(&xe->drm, "PCODE initialization timedout after: 3 min\n"); return ret; } /** - * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized. + * xe_pcode_init: initialize components of PCODE * @gt: gt instance * - * This function initializes the xe_pcode component, and when needed, it ensures - * that PCODE has properly performed its initialization and it is really ready - * to go. To be called once only during probe. - * - * It returns 0 on success, and -error number on failure. + * This function initializes the xe_pcode component. + * To be called once only during probe. */ -int xe_pcode_probe(struct xe_gt *gt) +void xe_pcode_init(struct xe_gt *gt) { drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); +} - if (gt_to_xe(gt)->info.skip_pcode) - return 0; - - if (!IS_DGFX(gt_to_xe(gt))) - return 0; - - return xe_pcode_init(gt); +/** + * xe_pcode_probe_early: initializes PCODE + * @xe: xe instance + * + * This function checks the initialization status of PCODE + * To be called once only during early probe without locks. + * + * Returns 0 on success, error code otherwise + */ +int xe_pcode_probe_early(struct xe_device *xe) +{ + return xe_pcode_ready(xe, false); } diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index 08cb1d047cba..3f54c6d2a57d 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -8,9 +8,11 @@ #include struct xe_gt; +struct xe_device; -int xe_pcode_probe(struct xe_gt *gt); -int xe_pcode_init(struct xe_gt *gt); +void xe_pcode_init(struct xe_gt *gt); +int xe_pcode_probe_early(struct xe_device *xe); +int xe_pcode_ready(struct xe_device *xe, bool locked); int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, u32 max_gt_freq); int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index cc650a92c2fc..f1fc83845c01 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -129,11 +129,9 @@ int xe_pm_resume(struct xe_device *xe) for_each_tile(tile, xe, id) xe_wa_apply_tile_workarounds(tile); - for_each_gt(gt, xe, id) { - err = xe_pcode_init(gt); - if (err) - goto err; - } + err = xe_pcode_ready(xe, true); + if (err) + return err; xe_display_pm_resume_early(xe); @@ -386,11 +384,9 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); if (xe->d3cold.allowed && xe->d3cold.power_lost) { - for_each_gt(gt, xe, id) { - err = xe_pcode_init(gt); - if (err) - goto out; - } + err = xe_pcode_ready(xe, true); + if (err) + goto out; /* * This only restores pinned memory which is the memory -- cgit v1.2.3 From 797b0e9be054b9fd6e6085ddf3d75523f3ad5e2c Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Wed, 10 Apr 2024 14:20:05 +0530 Subject: drm/xe: re-order lmem init check and wait for initialization to complete Lmem init check should be done only after pcode initialization status is complete. Move lmem init check after pcode status check. Also wait for a short while after pcode status check to allow completion of the task. Failing to do so, can lead to aborting the module load leaving the system unusable. Wait until the lmem initialization is complete within a timeout (60s) or till the user aborts. v2: use bool as return type re-order the code comment (Rodrigo) add comment for deferring probe (Himal) v3: rebase Signed-off-by: Riana Tauro Acked-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240410085005.1126343-3-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 62 ++++++++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_mmio.c | 24 ---------------- drivers/gpu/drm/xe/xe_mmio.h | 1 - 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a05623a8c163..927e1370e2a0 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -422,12 +422,68 @@ mask_err: return err; } +static bool verify_lmem_ready(struct xe_gt *gt) +{ + u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT; + + return !!val; +} + +static int wait_for_lmem_ready(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + unsigned long timeout, start; + + if (!IS_DGFX(xe)) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + + if (verify_lmem_ready(gt)) + return 0; + + drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); + + start = jiffies; + timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */ + + do { + if (signal_pending(current)) + return -EINTR; + + /* + * The boot firmware initializes local memory and + * assesses its health. If memory training fails, + * the punit will have been instructed to keep the GT powered + * down.we won't be able to communicate with it + * + * If the status check is done before punit updates the register, + * it can lead to the system being unusable. + * use a timeout and defer the probe to prevent this. + */ + if (time_after(jiffies, timeout)) { + drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); + return -EPROBE_DEFER; + } + + msleep(20); + + } while (!verify_lmem_ready(gt)); + + drm_dbg(&xe->drm, "lmem ready after %ums", + jiffies_to_msecs(jiffies - start)); + + return 0; +} + /** * xe_device_probe_early: Device early probe * @xe: xe device instance * * Initialize MMIO resources that don't require any - * knowledge about tile count. Also initialize pcode + * knowledge about tile count. Also initialize pcode and + * check vram initialization on root tile. * * Return: 0 on success, error code on failure */ @@ -441,11 +497,11 @@ int xe_device_probe_early(struct xe_device *xe) xe_sriov_probe_early(xe); - err = xe_mmio_verify_vram(xe); + err = xe_pcode_probe_early(xe); if (err) return err; - err = xe_pcode_probe_early(xe); + err = wait_for_lmem_ready(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index d66da1a9f165..334637511e75 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -420,30 +420,6 @@ int xe_mmio_init(struct xe_device *xe) return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); } -int xe_mmio_verify_vram(struct xe_device *xe) -{ - struct xe_gt *gt = xe_root_mmio_gt(xe); - - if (!IS_DGFX(xe)) - return 0; - - if (IS_SRIOV_VF(xe)) - return 0; - - /* - * The boot firmware initializes local memory and assesses its health. - * If memory training fails, the punit will have been instructed to - * keep the GT powered down; we won't be able to communicate with it - * and we should not continue with driver initialization. - */ - if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { - drm_err(&xe->drm, "VRAM not initialized by firmware\n"); - return -ENODEV; - } - - return 0; -} - u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index b1680c4a14fb..a3cd7b3036c7 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -21,7 +21,6 @@ struct xe_device; #define LMEM_BAR 2 int xe_mmio_init(struct xe_device *xe); -int xe_mmio_verify_vram(struct xe_device *xe); void xe_mmio_probe_tiles(struct xe_device *xe); u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg); -- cgit v1.2.3 From 13c52251523bfe25db26d0205a09f5d2181e4bc0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 21:36:45 +0200 Subject: drm/xe/guc: Prefer GT oriented logs in GuC CTB code A platform can have more than one GuC, so we should use GT-oriented logs to refer to specific GuC. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240404193647.759-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 62 ++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 0aa3abaca66d..e4676bdfbeb2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -318,6 +318,7 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, int xe_guc_ct_enable(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); int err; xe_assert(xe, !xe_guc_ct_enabled(ct)); @@ -341,12 +342,12 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) smp_mb(); wake_up_all(&ct->wq); - drm_dbg(&xe->drm, "GuC CT communication channel enabled\n"); + xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); return 0; err_out: - drm_err(&xe->drm, "Failed to enable CT (%d)\n", err); + xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err)); return err; } @@ -633,8 +634,8 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { - struct drm_device *drm = &ct_to_xe(ct)->drm; - struct drm_printer p = drm_info_printer(drm->dev); + struct xe_gt *gt = ct_to_gt(ct); + struct drm_printer p = xe_gt_info_printer(gt); unsigned int sleep_period_ms = 1; int ret; @@ -696,7 +697,7 @@ try_again: return ret; broken: - drm_err(drm, "No forward process on H2G, reset required"); + xe_gt_err(gt, "No forward process on H2G, reset required\n"); xe_guc_ct_print(ct, &p, true); ct->ctbs.h2g.info.broken = true; @@ -776,7 +777,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer, bool no_fail) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct g2h_fence g2h_fence; int ret = 0; @@ -818,20 +819,20 @@ retry_same_fence: ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); if (!ret) { - drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x", - g2h_fence.seqno, action[0]); + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x", + g2h_fence.seqno, action[0]); xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); return -ETIME; } if (g2h_fence.retry) { - drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d", - action[0], g2h_fence.reason); + xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u", + action[0], g2h_fence.reason); goto retry; } if (g2h_fence.fail) { - drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d", - action[0], g2h_fence.error, g2h_fence.hint); + xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u", + action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; } @@ -966,7 +967,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); u32 *hxg = msg_to_hxg(msg); u32 origin, type; int ret; @@ -975,9 +976,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]); if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { - drm_err(&xe->drm, - "G2H channel broken on read, origin=%d, reset required\n", - origin); + xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n", + origin); ct->ctbs.g2h.info.broken = true; return -EPROTO; @@ -994,9 +994,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = parse_g2h_response(ct, msg, len); break; default: - drm_err(&xe->drm, - "G2H channel broken on read, type=%d, reset required\n", - type); + xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n", + type); ct->ctbs.g2h.info.broken = true; ret = -EOPNOTSUPP; @@ -1007,7 +1006,6 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { - struct xe_device *xe = ct_to_xe(ct); struct xe_guc *guc = ct_to_guc(ct); struct xe_gt *gt = ct_to_gt(ct); u32 hxg_len = msg_len_to_hxg_len(len); @@ -1069,12 +1067,12 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len); break; default: - drm_err(&xe->drm, "unexpected action 0x%04x\n", action); + xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } if (ret) - drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", - action, ret); + xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", + action, ERR_PTR(ret)); return 0; } @@ -1082,6 +1080,7 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) { struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; u32 tail, head, len; s32 avail; @@ -1116,9 +1115,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) sizeof(u32)); len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; if (len > avail) { - drm_err(&xe->drm, - "G2H channel broken on read, avail=%d, len=%d, reset required\n", - avail, len); + xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n", + avail, len); g2h->info.broken = true; return -EPROTO; @@ -1171,7 +1169,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct xe_guc *guc = ct_to_guc(ct); u32 hxg_len = msg_len_to_hxg_len(len); u32 *hxg = msg_to_hxg(msg); @@ -1190,12 +1188,12 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) adj_len); break; default: - drm_warn(&xe->drm, "NOT_POSSIBLE"); + xe_gt_warn(gt, "NOT_POSSIBLE"); } if (ret) - drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", - action, ret); + xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", + action, ERR_PTR(ret)); } /** @@ -1256,6 +1254,7 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct) static void g2h_worker_func(struct work_struct *w) { struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + struct xe_gt *gt = ct_to_gt(ct); bool ongoing; int ret; @@ -1292,8 +1291,7 @@ static void g2h_worker_func(struct work_struct *w) mutex_unlock(&ct->lock); if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { - struct drm_device *drm = &ct_to_xe(ct)->drm; - struct drm_printer p = drm_info_printer(drm->dev); + struct drm_printer p = xe_gt_info_printer(gt); xe_guc_ct_print(ct, &p, false); kick_reset(ct); -- cgit v1.2.3 From 9c1857d587e91dfc10875a8c1083360db047404f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Apr 2024 21:36:46 +0200 Subject: drm/xe/guc: Prefer GT oriented asserts in CTB code GuC CTB is related to the GT, so best to use xe_gt_assert(). Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240404193647.759-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index e4676bdfbeb2..b1412d432ec2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -144,7 +144,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) struct xe_bo *bo; int err; - xe_assert(xe, !(guc_ct_size() % PAGE_SIZE)); + xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); @@ -171,7 +171,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) if (err) return err; - xe_assert(xe, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } @@ -321,7 +321,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) struct xe_gt *gt = ct_to_gt(ct); int err; - xe_assert(xe, !xe_guc_ct_enabled(ct)); + xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -428,7 +428,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { - xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); @@ -441,8 +441,8 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) { lockdep_assert_held(&ct->fast_lock); - xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <= - ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); + xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <= + ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; --ct->g2h_outstanding; @@ -461,6 +461,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 ct_fence_value, bool want_response) { struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct guc_ctb *h2g = &ct->ctbs.h2g; u32 cmd[H2G_CT_HEADERS]; u32 tail = h2g->info.tail; @@ -471,8 +472,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); - xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN); - xe_assert(xe, tail <= h2g->info.size); + xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN); + xe_gt_assert(gt, tail <= h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ if (tail + full_len > h2g->info.size) { @@ -521,7 +522,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); - trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len, + trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len, desc_read(xe, h2g, head), h2g->info.tail); return 0; @@ -550,15 +551,15 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt __maybe_unused = ct_to_gt(ct); u16 seqno; int ret; - xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); - xe_assert(xe, !g2h_len || !g2h_fence); - xe_assert(xe, !num_g2h || !g2h_fence); - xe_assert(xe, !g2h_len || num_g2h); - xe_assert(xe, g2h_len || !num_g2h); + xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, !g2h_len || !g2h_fence); + xe_gt_assert(gt, !num_g2h || !g2h_fence); + xe_gt_assert(gt, !g2h_len || num_g2h); + xe_gt_assert(gt, g2h_len || !num_g2h); lockdep_assert_held(&ct->lock); if (unlikely(ct->ctbs.h2g.info.broken)) { @@ -576,7 +577,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, goto out; } - xe_assert(xe, xe_guc_ct_enabled(ct)); + xe_gt_assert(gt, xe_guc_ct_enabled(ct)); if (g2h_fence) { g2h_len = GUC_CTB_HXG_MSG_MAX_LEN; @@ -639,7 +640,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, unsigned int sleep_period_ms = 1; int ret; - xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); + xe_gt_assert(gt, !g2h_len || !g2h_fence); lockdep_assert_held(&ct->lock); xe_device_assert_mem_access(ct_to_xe(ct)); @@ -709,7 +710,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, { int ret; - xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); + xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence); mutex_lock(&ct->lock); ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); @@ -901,7 +902,6 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); - struct xe_device *xe = gt_to_xe(gt); u32 *hxg = msg_to_hxg(msg); u32 hxg_len = msg_len_to_hxg_len(len); u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]); @@ -939,7 +939,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) return 0; } - xe_assert(xe, fence == g2h_fence->seqno); + xe_gt_assert(gt, fence == g2h_fence->seqno); if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { g2h_fence->fail = true; @@ -1087,7 +1087,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) @@ -1099,7 +1099,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) if (g2h->info.broken) return -EPIPE; - xe_assert(xe, xe_guc_ct_enabled(ct)); + xe_gt_assert(gt, xe_guc_ct_enabled(ct)); /* Calculate DW available to read */ tail = desc_read(xe, g2h, tail); -- cgit v1.2.3 From ac321eb46e850db5942e4d395b6a50385170d9c0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 10 Apr 2024 11:16:11 -0700 Subject: drm/xe: Add xe_guc_ads.c to uses_generated_oob A recent change added a use of xe_wa_oob.h without adding the file that uses it to uses_generated_oob, which means xe_wa_oob.h does not get properly generated before attempting to build the object file: LINK resolve_btfids CC [M] drivers/gpu/drm/xe/xe_guc_ads.o drivers/gpu/drm/xe/xe_guc_ads.c:10:10: fatal error: generated/xe_wa_oob.h: No such file or directory 10 | #include | ^~~~~~~~~~~~~~~~~~~~~~~ After adding '$(obj)/xe_guc_ads.o' to uses_generated_oob, xe_wa_oob.h is always generated before building the file, resulting in no errors: LINK resolve_btfids HOSTCC drivers/gpu/drm/xe/xe_gen_wa_oob GEN xe_wa_oob.c xe_wa_oob.h CC [M] drivers/gpu/drm/xe/xe_guc_ads.o Fixes: c151ff5c9053 ("drm/xe/lnl: Enable GuC Wa_14019882105") Signed-off-by: Nathan Chancellor Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240410-drm-xe-fix-xe_guc_ads-using-xe_wa_oob-v1-1-441f2d8e5d83@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e106767c9a6e..60c90dc918b2 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -49,6 +49,7 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ uses_generated_oob := \ $(obj)/xe_gsc.o \ $(obj)/xe_guc.o \ + $(obj)/xe_guc_ads.o \ $(obj)/xe_migrate.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ -- cgit v1.2.3 From a28380f119a918135c6b7155fb4eb95eaabb62dc Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 9 Apr 2024 13:02:05 -0700 Subject: devcoredump: Add dev_coredump_put() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is useful for modules that do not want to keep coredump available after its unload. Otherwise, the coredump would only be removed after DEVCD_TIMEOUT seconds. v2: - dev_coredump_put() documentation updated (Mukesh) Cc: Rodrigo Vivi Cc: Mukesh Ojha Cc: Johannes Berg Cc: Jonathan Cavitt Reviewed-by: Johannes Berg Acked-by: Jonathan Cavitt Signed-off-by: José Roberto de Souza Acked-by: Greg Kroah-Hartman Link: https://patchwork.freedesktop.org/patch/msgid/20240409200206.108452-1-jose.souza@intel.com Signed-off-by: Rodrigo Vivi --- drivers/base/devcoredump.c | 23 +++++++++++++++++++++++ include/linux/devcoredump.h | 5 +++++ 2 files changed, 28 insertions(+) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 7e2d1f0d903a..82aeb09b3d1b 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -304,6 +304,29 @@ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset, offset); } +/** + * dev_coredump_put - remove device coredump + * @dev: the struct device for the crashed device + * + * dev_coredump_put() removes coredump, if exists, for a given device from + * the file system and free its associated data otherwise, does nothing. + * + * It is useful for modules that do not want to keep coredump + * available after its unload. + */ +void dev_coredump_put(struct device *dev) +{ + struct device *existing; + + existing = class_find_device(&devcd_class, NULL, dev, + devcd_match_failing); + if (existing) { + devcd_free(existing, NULL); + put_device(existing); + } +} +EXPORT_SYMBOL_GPL(dev_coredump_put); + /** * dev_coredumpm - create device coredump with read/free methods * @dev: the struct device for the crashed device diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c008169ed2c6..c8f7eb6cc191 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -63,6 +63,8 @@ void dev_coredumpm(struct device *dev, struct module *owner, void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); + +void dev_coredump_put(struct device *dev); #else static inline void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp) @@ -85,6 +87,9 @@ static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table, { _devcd_free_sgtable(table); } +static inline void dev_coredump_put(struct device *dev) +{ +} #endif /* CONFIG_DEV_COREDUMP */ #endif /* __DEVCOREDUMP_H */ -- cgit v1.2.3 From 4209d635a823619038db0bf72e86a223f4186634 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 9 Apr 2024 13:02:06 -0700 Subject: drm/xe: Remove devcoredump during driver release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will remove devcoredump from file system and free its resources during driver unload. This fix the driver unload after gpu hang happened, otherwise this it would report that Xe KMD is still in use and it would leave the kernel in a state that Xe KMD can't be unload without a reboot. Cc: Rodrigo Vivi Cc: Jonathan Cavitt Acked-by: Jonathan Cavitt Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240409200206.108452-2-jose.souza@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_devcoredump.c | 13 ++++++++++++- drivers/gpu/drm/xe/xe_devcoredump.h | 6 ++++++ drivers/gpu/drm/xe/xe_device.c | 4 ++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 283ca7518aff..3d7980232be1 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -9,6 +9,8 @@ #include #include +#include + #include "xe_device.h" #include "xe_exec_queue.h" #include "xe_force_wake.h" @@ -235,5 +237,14 @@ void xe_devcoredump(struct xe_sched_job *job) dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, xe_devcoredump_read, xe_devcoredump_free); } -#endif +static void xe_driver_devcoredump_fini(struct drm_device *drm, void *arg) +{ + dev_coredump_put(drm->dev); +} + +int xe_devcoredump_init(struct xe_device *xe) +{ + return drmm_add_action_or_reset(&xe->drm, xe_driver_devcoredump_fini, xe); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index df8671f0b5eb..e2fa65ce0932 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -11,10 +11,16 @@ struct xe_sched_job; #ifdef CONFIG_DEV_COREDUMP void xe_devcoredump(struct xe_sched_job *job); +int xe_devcoredump_init(struct xe_device *xe); #else static inline void xe_devcoredump(struct xe_sched_job *job) { } + +static inline int xe_devcoredump_init(struct xe_device *xe) +{ + return 0; +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 927e1370e2a0..d85a2ba0a057 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -20,6 +20,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_debugfs.h" +#include "xe_devcoredump.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" #include "xe_drv.h" @@ -579,6 +580,9 @@ int xe_device_probe(struct xe_device *xe) return err; } + err = xe_devcoredump_init(xe); + if (err) + return err; err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); if (err) return err; -- cgit v1.2.3 From a2f3d731be3893e730417ae3190760fcaffdf549 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 4 Apr 2024 11:03:02 +0200 Subject: drm/xe: Fix bo leak in intel_fb_bo_framebuffer_init Add a unreference bo in the error path, to prevent leaking a bo ref. Return 0 on success to clarify the success path. Signed-off-by: Maarten Lankhorst Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Cc: # v6.8+ Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240404090302.68422-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/xe/display/intel_fb_bo.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index dba327f53ac5..e18521acc516 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -31,7 +31,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); if (ret) - return ret; + goto err; if (!(bo->flags & XE_BO_FLAG_SCANOUT)) { /* @@ -42,12 +42,16 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, */ if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) { ttm_bo_unreserve(&bo->ttm); - return -EINVAL; + ret = -EINVAL; + goto err; } bo->flags |= XE_BO_FLAG_SCANOUT; } ttm_bo_unreserve(&bo->ttm); + return 0; +err: + xe_bo_put(bo); return ret; } -- cgit v1.2.3 From 81e058a3e7fd8593d076b4f26f7b8bb49f1d61e3 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 12 Apr 2024 15:22:36 +0530 Subject: drm/xe: Introduce helper to populate userptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a helper function xe_userptr_populate_range to populate a userptr range. This functions calls hmm_range_fault to read CPU page tables and populate all pfns/pages of this virtual address range. For system memory page, dma-mapping is performed to get a dma-address which can be used later for GPU to access pages. v1: Address review comments: separate a npage_in_range function (Matt) reparameterize function xe_userptr_populate_range function (Matt) move mmu_interval_read_begin() call into while loop (Thomas) s/mark_range_accessed/xe_mark_range_accessed (Thomas) use set_page_dirty_lock (vs set_page_dirty) (Thomas) move a few checking in xe_vma_userptr_pin_pages to hmm.c (Matt) v2: Remove device private page support. Only support system pages for now. use dma-map-sg rather than dma-map-page (Matt/Thomas) v3: Address review comments: Squash patch "drm/xe: Introduce a helper to free sg table" to current patch (Matt) start and end addresses are already page aligned (Matt) Do mmap_read_lock and mmap_read_unlock for hmm_range_fault incase of non system allocator call. (Matt) Drop kthread_use_mm and kthread_unuse_mm. (Matt) No need of kernel-doc for static functions.(Matt) Modify function names. (Matt) Free sgtable incase of dma_map_sgtable failure.(Matt) Modify loop for hmm_range_fault.(Matt) v4: Remove the dummy function for xe_hmm_userptr_populate_range since CONFIG_HMM_MIRROR is needed. (Matt) Change variable names start/end to userptr_start/userptr_end.(Matt) v5: Remove device private page support info from commit message. Since the patch doesn't support device page handling. (Thomas) Signed-off-by: Oak Zeng Co-developed-by: Niranjana Vishwanathapura Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Cc: Matthew Brost Cc: Thomas Hellström Cc: Brian Welty Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240412095237.1048599-2-himal.prasad.ghimiray@intel.com --- drivers/gpu/drm/xe/Kconfig | 1 + drivers/gpu/drm/xe/Makefile | 2 + drivers/gpu/drm/xe/xe_hmm.c | 253 ++++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hmm.h | 11 ++ 4 files changed, 267 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_hmm.c create mode 100644 drivers/gpu/drm/xe/xe_hmm.h diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 1a556d087e63..449a1ecbc92a 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -41,6 +41,7 @@ config DRM_XE select MMU_NOTIFIER select WANT_DEV_COREDUMP select AUXILIARY_BUS + select HMM_MIRROR help Experimental driver for Intel Xe series GPUs diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 60c90dc918b2..027ff5f93fde 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -147,6 +147,8 @@ xe-y += xe_bb.o \ xe_wa.o \ xe_wopcm.o +xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o + # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c new file mode 100644 index 000000000000..2c32dc46f7d4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include "xe_hmm.h" +#include "xe_vm.h" +#include "xe_bo.h" + +static u64 xe_npages_in_range(unsigned long start, unsigned long end) +{ + return (end - start) >> PAGE_SHIFT; +} + +/* + * xe_mark_range_accessed() - mark a range is accessed, so core mm + * have such information for memory eviction or write back to + * hard disk + * + * @range: the range to mark + * @write: if write to this range, we mark pages in this range + * as dirty + */ +static void xe_mark_range_accessed(struct hmm_range *range, bool write) +{ + struct page *page; + u64 i, npages; + + npages = xe_npages_in_range(range->start, range->end); + for (i = 0; i < npages; i++) { + page = hmm_pfn_to_page(range->hmm_pfns[i]); + if (write) + set_page_dirty_lock(page); + + mark_page_accessed(page); + } +} + +/* + * xe_build_sg() - build a scatter gather table for all the physical pages/pfn + * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table + * and will be used to program GPU page table later. + * + * @xe: the xe device who will access the dma-address in sg table + * @range: the hmm range that we build the sg table from. range->hmm_pfns[] + * has the pfn numbers of pages that back up this hmm address range. + * @st: pointer to the sg table. + * @write: whether we write to this range. This decides dma map direction + * for system pages. If write we map it bi-diretional; otherwise + * DMA_TO_DEVICE + * + * All the contiguous pfns will be collapsed into one entry in + * the scatter gather table. This is for the purpose of efficiently + * programming GPU page table. + * + * The dma_address in the sg table will later be used by GPU to + * access memory. So if the memory is system memory, we need to + * do a dma-mapping so it can be accessed by GPU/DMA. + * + * FIXME: This function currently only support pages in system + * memory. If the memory is GPU local memory (of the GPU who + * is going to access memory), we need gpu dpa (device physical + * address), and there is no need of dma-mapping. This is TBD. + * + * FIXME: dma-mapping for peer gpu device to access remote gpu's + * memory. Add this when you support p2p + * + * This function allocates the storage of the sg table. It is + * caller's responsibility to free it calling sg_free_table. + * + * Returns 0 if successful; -ENOMEM if fails to allocate memory + */ +static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, + struct sg_table *st, bool write) +{ + struct device *dev = xe->drm.dev; + struct page **pages; + u64 i, npages; + int ret; + + npages = xe_npages_in_range(range->start, range->end); + pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < npages; i++) { + pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); + xe_assert(xe, !is_device_private_page(pages[i])); + } + + ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT, + xe_sg_segment_size(dev), GFP_KERNEL); + if (ret) + goto free_pages; + + ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); + if (ret) { + sg_free_table(st); + st = NULL; + } + +free_pages: + kvfree(pages); + return ret; +} + +/* + * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr + * + * @uvma: the userptr vma which hold the scatter gather table + * + * With function xe_userptr_populate_range, we allocate storage of + * the userptr sg table. This is a helper function to free this + * sg table, and dma unmap the address in the table. + */ +void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + struct device *dev = xe->drm.dev; + + xe_assert(xe, userptr->sg); + dma_unmap_sgtable(dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + + sg_free_table(userptr->sg); + userptr->sg = NULL; +} + +/** + * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual + * address range + * + * @uvma: userptr vma which has information of the range to populate. + * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. + * + * This function populate the physical pages of a virtual + * address range. The populated physical pages is saved in + * userptr's sg table. It is similar to get_user_pages but call + * hmm_range_fault. + * + * This function also read mmu notifier sequence # ( + * mmu_interval_read_begin), for the purpose of later + * comparison (through mmu_interval_read_retry). + * + * This must be called with mmap read or write lock held. + * + * This function allocates the storage of the userptr sg table. + * It is caller's responsibility to free it calling sg_free_table. + * + * returns: 0 for succuss; negative error no on failure + */ +int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, + bool is_mm_mmap_locked) +{ + unsigned long timeout = + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + unsigned long *pfns, flags = HMM_PFN_REQ_FAULT; + struct xe_userptr *userptr; + struct xe_vma *vma = &uvma->vma; + u64 userptr_start = xe_vma_userptr(vma); + u64 userptr_end = userptr_start + xe_vma_size(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct hmm_range hmm_range; + bool write = !xe_vma_read_only(vma); + unsigned long notifier_seq; + u64 npages; + int ret; + + userptr = &uvma->userptr; + + if (is_mm_mmap_locked) + mmap_assert_locked(userptr->notifier.mm); + + if (vma->gpuva.flags & XE_VMA_DESTROYED) + return 0; + + notifier_seq = mmu_interval_read_begin(&userptr->notifier); + if (notifier_seq == userptr->notifier_seq) + return 0; + + if (userptr->sg) + xe_hmm_userptr_free_sg(uvma); + + npages = xe_npages_in_range(userptr_start, userptr_end); + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) + return -ENOMEM; + + if (write) + flags |= HMM_PFN_REQ_WRITE; + + if (!mmget_not_zero(userptr->notifier.mm)) { + ret = -EFAULT; + goto free_pfns; + } + + hmm_range.default_flags = flags; + hmm_range.hmm_pfns = pfns; + hmm_range.notifier = &userptr->notifier; + hmm_range.start = userptr_start; + hmm_range.end = userptr_end; + hmm_range.dev_private_owner = vm->xe; + + while (true) { + hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); + + if (!is_mm_mmap_locked) + mmap_read_lock(userptr->notifier.mm); + + ret = hmm_range_fault(&hmm_range); + + if (!is_mm_mmap_locked) + mmap_read_unlock(userptr->notifier.mm); + + if (ret == -EBUSY) { + if (time_after(jiffies, timeout)) + break; + + continue; + } + break; + } + + mmput(userptr->notifier.mm); + + if (ret) + goto free_pfns; + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write); + if (ret) + goto free_pfns; + + xe_mark_range_accessed(&hmm_range, write); + userptr->sg = &userptr->sgt; + userptr->notifier_seq = hmm_range.notifier_seq; + +free_pfns: + kvfree(pfns); + return ret; +} + diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h new file mode 100644 index 000000000000..909dc2bdcd97 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright © 2024 Intel Corporation + */ + +#include + +struct xe_userptr_vma; + +int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); +void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); -- cgit v1.2.3 From 12f4b58a37f48a049893f1ae04b3fbb9b5088e8c Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 12 Apr 2024 15:22:37 +0530 Subject: drm/xe: Use hmm_range_fault to populate user pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an effort to unify hmmptr (aka system allocator) and userptr code. hmm_range_fault is used to populate a virtual address range for both hmmptr and userptr, instead of hmmptr using hmm_range_fault and userptr using get_user_pages_fast. This also aligns with AMD gpu driver's behavior. In long term, we plan to put some common helpers in this area to drm layer so it can be re-used by different vendors. -v1 use the function with parameter to confirm whether lock is acquired by the caller or needs to be acquired in hmm_range_fault. Reviewed-by: Matthew Brost Cc: Matthew Brost Signed-off-by: Oak Zeng Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240412095237.1048599-3-himal.prasad.ghimiray@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 117 +++------------------------------------------ 1 file changed, 6 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 66b70fd3d105..9c6f653992dd 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -39,6 +39,7 @@ #include "xe_sync.h" #include "xe_trace.h" #include "xe_wa.h" +#include "xe_hmm.h" static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { @@ -66,113 +67,14 @@ int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct xe_device *xe = vm->xe; - const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; - struct page **pages; - bool in_kthread = !current->mm; - unsigned long notifier_seq; - int pinned, ret, i; - bool read_only = xe_vma_read_only(vma); lockdep_assert_held(&vm->lock); xe_assert(xe, xe_vma_is_userptr(vma)); -retry: - if (vma->gpuva.flags & XE_VMA_DESTROYED) - return 0; - - notifier_seq = mmu_interval_read_begin(&userptr->notifier); - if (notifier_seq == userptr->notifier_seq) - return 0; - - pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - if (userptr->sg) { - dma_unmap_sgtable(xe->drm.dev, - userptr->sg, - read_only ? DMA_TO_DEVICE : - DMA_BIDIRECTIONAL, 0); - sg_free_table(userptr->sg); - userptr->sg = NULL; - } - - pinned = ret = 0; - if (in_kthread) { - if (!mmget_not_zero(userptr->notifier.mm)) { - ret = -EFAULT; - goto mm_closed; - } - kthread_use_mm(userptr->notifier.mm); - } - - while (pinned < num_pages) { - ret = get_user_pages_fast(xe_vma_userptr(vma) + - pinned * PAGE_SIZE, - num_pages - pinned, - read_only ? 0 : FOLL_WRITE, - &pages[pinned]); - if (ret < 0) - break; - - pinned += ret; - ret = 0; - } - - if (in_kthread) { - kthread_unuse_mm(userptr->notifier.mm); - mmput(userptr->notifier.mm); - } -mm_closed: - if (ret) - goto out; - - ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages, - pinned, 0, - (u64)pinned << PAGE_SHIFT, - xe_sg_segment_size(xe->drm.dev), - GFP_KERNEL); - if (ret) { - userptr->sg = NULL; - goto out; - } - userptr->sg = &userptr->sgt; - - ret = dma_map_sgtable(xe->drm.dev, userptr->sg, - read_only ? DMA_TO_DEVICE : - DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_KERNEL_MAPPING); - if (ret) { - sg_free_table(userptr->sg); - userptr->sg = NULL; - goto out; - } - - for (i = 0; i < pinned; ++i) { - if (!read_only) { - lock_page(pages[i]); - set_page_dirty(pages[i]); - unlock_page(pages[i]); - } - mark_page_accessed(pages[i]); - } - -out: - release_pages(pages, pinned); - kvfree(pages); - - if (!(ret < 0)) { - userptr->notifier_seq = notifier_seq; - if (xe_vma_userptr_check_repin(uvma) == -EAGAIN) - goto retry; - } - - return ret < 0 ? ret : 0; + return xe_hmm_userptr_populate_range(uvma, false); } static bool preempt_fences_waiting(struct xe_vm *vm) @@ -956,8 +858,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, static void xe_vma_destroy_late(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - bool read_only = xe_vma_read_only(vma); if (vma->ufence) { xe_sync_ufence_put(vma->ufence); @@ -965,16 +865,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) } if (xe_vma_is_userptr(vma)) { - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + struct xe_userptr *userptr = &uvma->userptr; - if (userptr->sg) { - dma_unmap_sgtable(xe->drm.dev, - userptr->sg, - read_only ? DMA_TO_DEVICE : - DMA_BIDIRECTIONAL, 0); - sg_free_table(userptr->sg); - userptr->sg = NULL; - } + if (userptr->sg) + xe_hmm_userptr_free_sg(uvma); /* * Since userptr pages are not pinned, we can't remove -- cgit v1.2.3 From 0bdd5b16ba0444f41d538f5927cb9b995d684594 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 10 Apr 2024 19:03:34 +0200 Subject: drm/xe/pf: Introduce mutex to protect VFs configurations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PF driver will maintain configurations and resources for every VF and this data could span multiple tiles and/or GTs. Prepare mutex to protect data that we will add in upcoming patches. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240410170338.1199-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov.c | 7 +++++++ drivers/gpu/drm/xe/xe_sriov_pf.c | 15 +++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf.h | 6 ++++++ drivers/gpu/drm/xe/xe_sriov_types.h | 4 ++++ 4 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index d324f131e3da..1b40f5de9ef5 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -94,6 +94,13 @@ int xe_sriov_init(struct xe_device *xe) if (!IS_SRIOV(xe)) return 0; + if (IS_SRIOV_PF(xe)) { + int err = xe_sriov_pf_init_early(xe); + + if (err) + return err; + } + xe_assert(xe, !xe->sriov.wq); xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); if (!xe->sriov.wq) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 030c2b69ecc4..0f721ae17b26 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,6 +3,8 @@ * Copyright © 2023-2024 Intel Corporation */ +#include + #include "xe_assert.h" #include "xe_device.h" #include "xe_module.h" @@ -70,6 +72,19 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) return true; } +/** + * xe_sriov_pf_init_early - Initialize SR-IOV PF specific data. + * @xe: the &xe_device to initialize + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_init_early(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); +} + /** * xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information. * @xe: the &xe_device to print info from diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index ebef2e01838a..d1220e70e1c0 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -13,12 +13,18 @@ struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); +int xe_sriov_pf_init_early(struct xe_device *xe); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) { return false; } + +static inline int xe_sriov_pf_init_early(struct xe_device *xe) +{ + return 0; +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index fa583e8fa0c2..c7b7ad4af5c8 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,6 +7,7 @@ #define _XE_SRIOV_TYPES_H_ #include +#include #include /** @@ -50,6 +51,9 @@ struct xe_device_pf { /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; }; #endif -- cgit v1.2.3 From 25f2e04b91d4fefbf9ee4890d774f82c8570ccc5 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 10 Apr 2024 19:03:35 +0200 Subject: drm/xe/pf: Introduce helper functions for use by PF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PF driver will maintain VF's configuration data mostly on the GT level, but some internal data is located at the device level. To allow easy access to that data from the GT level functions, and to minimize code duplications, introduce set of helper functions and macros for explicit use by the PF driver. We will use these helpers in upcoming patches. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240410170338.1199-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h | 35 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_helpers.h | 46 +++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_helpers.h diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h new file mode 100644 index 000000000000..0bf12d89ceb2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_HELPERS_H_ +#define _XE_GT_SRIOV_PF_HELPERS_H_ + +#include "xe_gt_types.h" +#include "xe_sriov_pf_helpers.h" + +/** + * xe_gt_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging. + * @gt: the PF &xe_gt to assert on + * @vfid: the VF number to assert + * + * Assert that > belongs to the Physical Function (PF) device and provided &vfid + * is within a range of supported VF numbers (up to maximum number of VFs that + * driver can support, including VF0 that represents the PF itself). + * + * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + */ +#define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid)) + +static inline int xe_gt_sriov_pf_get_totalvfs(struct xe_gt *gt) +{ + return xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); +} + +static inline struct mutex *xe_gt_sriov_pf_master_mutex(struct xe_gt *gt) +{ + return xe_sriov_pf_master_mutex(gt_to_xe(gt)); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h new file mode 100644 index 000000000000..7d156ba82479 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_HELPERS_H_ +#define _XE_SRIOV_PF_HELPERS_H_ + +#include "xe_assert.h" +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_types.h" + +/** + * xe_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging. + * @xe: the PF &xe_device to assert on + * @vfid: the VF number to assert + * + * Assert that &xe represents the Physical Function (PF) device and provided &vfid + * is within a range of supported VF numbers (up to maximum number of VFs that + * driver can support, including VF0 that represents the PF itself). + * + * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + */ +#define xe_sriov_pf_assert_vfid(xe, vfid) \ + xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe)) + +/** + * xe_sriov_pf_get_totalvfs() - Get maximum number of VFs that driver can support. + * @xe: the &xe_device to query (shall be PF) + * + * Return: Maximum number of VFs that this PF driver supports. + */ +static inline int xe_sriov_pf_get_totalvfs(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + return xe->sriov.pf.driver_max_vfs; +} + +static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + return &xe->sriov.pf.master_lock; +} + +#endif -- cgit v1.2.3 From bbc8a6fb83afc41ba4e8d2564314d7a4d01db0cb Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 10 Apr 2024 19:03:36 +0200 Subject: drm/xe/guc: Add PF2GUC_UPDATE_VGT_POLICY to ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In upcoming patches the PF driver will add support to change GuC policies and will need to use PF2GUC_UPDATE_VGT_POLICY messages. Add necessary definitions to our GuC firmware ABI header. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240410170338.1199-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 47 ++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 12ee5e9e831f..2d829e812c61 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -213,6 +213,53 @@ #define GUC_PF_NOTIFY_VF_PAUSE_DONE 3u #define GUC_PF_NOTIFY_VF_FIXUP_DONE 4u +/** + * DOC: PF2GUC_UPDATE_VGT_POLICY + * + * This message is used by the PF to set `GuC VGT Policy KLVs`_. + * + * This message must be sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY` = 0x5502 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that | + * | | | represents the start of `GuC VGT Policy KLVs`_ list. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **CFG_ADDR_HI** - upper 32 bits of above offset. | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **CFG_SIZE** - size (in dwords) of the config buffer | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNT** - number of KLVs successfully applied | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY 0x5502u + +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_1_CFG_ADDR_LO GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_2_CFG_ADDR_HI GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_3_CFG_SIZE GUC_HXG_REQUEST_MSG_n_DATAn + +#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0 + /** * DOC: PF2GUC_VF_CONTROL * -- cgit v1.2.3 From 0ddc1e0721d410ae09a8ea4cbfebfb20bc1e2e03 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 10 Apr 2024 19:03:37 +0200 Subject: drm/xe/guc: Add helpers for GuC KLVs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many of the GuC actions use KLVs to pass additional parameters or configuration data. Add few helper functions for better reporting any information related to KLVs. Cc: Himal Prasad Ghimiray Reviewed-by: Piotr Piórkowski Acked-by: Himal Prasad Ghimiray Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240410170338.1199-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_guc_klv_helpers.c | 134 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_klv_helpers.h | 51 ++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_guc_klv_helpers.c create mode 100644 drivers/gpu/drm/xe/xe_guc_klv_helpers.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 28ea5fd0ef33..e8d2bd31e537 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -99,6 +99,7 @@ xe-y += xe_bb.o \ xe_guc_debugfs.o \ xe_guc_hwconfig.o \ xe_guc_id_mgr.o \ + xe_guc_klv_helpers.o \ xe_guc_log.o \ xe_guc_pc.o \ xe_guc_submit.o \ diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c new file mode 100644 index 000000000000..ceca949932a0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include +#include + +#include "abi/guc_klvs_abi.h" +#include "xe_guc_klv_helpers.h" + +#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) + +/** + * xe_guc_klv_key_to_string - Convert KLV key into friendly name. + * @key: the `GuC KLV`_ key + * + * Return: name of the KLV key. + */ +const char *xe_guc_klv_key_to_string(u16 key) +{ + switch (key) { + /* VGT POLICY keys */ + case GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY: + return "sched_if_idle"; + case GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY: + return "sample_period"; + case GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY: + return "reset_engine"; + /* VF CFG keys */ + case GUC_KLV_VF_CFG_GGTT_START_KEY: + return "ggtt_start"; + case GUC_KLV_VF_CFG_GGTT_SIZE_KEY: + return "ggtt_size"; + case GUC_KLV_VF_CFG_LMEM_SIZE_KEY: + return "lmem_size"; + case GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY: + return "num_contexts"; + case GUC_KLV_VF_CFG_TILE_MASK_KEY: + return "tile_mask"; + case GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY: + return "num_doorbells"; + case GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY: + return "exec_quantum"; + case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY: + return "preempt_timeout"; + case GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY: + return "begin_db_id"; + case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY: + return "begin_ctx_id"; + default: + return "(unknown)"; + } +} + +/** + * xe_guc_klv_print - Print content of the buffer with `GuC KLV`_. + * @klvs: the buffer with KLVs + * @num_dwords: number of dwords (u32) available in the buffer + * @p: the &drm_printer + * + * The buffer may contain more than one KLV. + */ +void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p) +{ + while (num_dwords >= GUC_KLV_LEN_MIN) { + u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]); + u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]); + + klvs += GUC_KLV_LEN_MIN; + num_dwords -= GUC_KLV_LEN_MIN; + + if (num_dwords < len) { + drm_printf(p, "{ key %#06x : truncated %zu of %zu bytes %*ph } # %s\n", + key, num_dwords * sizeof(u32), len * sizeof(u32), + (int)(num_dwords * sizeof(u32)), klvs, + xe_guc_klv_key_to_string(key)); + return; + } + + switch (len) { + case 0: + drm_printf(p, "{ key %#06x : no value } # %s\n", + key, xe_guc_klv_key_to_string(key)); + break; + case 1: + drm_printf(p, "{ key %#06x : 32b value %u } # %s\n", + key, klvs[0], xe_guc_klv_key_to_string(key)); + break; + case 2: + drm_printf(p, "{ key %#06x : 64b value %#llx } # %s\n", + key, make_u64(klvs[1], klvs[0]), + xe_guc_klv_key_to_string(key)); + break; + default: + drm_printf(p, "{ key %#06x : %zu bytes %*ph } # %s\n", + key, len * sizeof(u32), (int)(len * sizeof(u32)), + klvs, xe_guc_klv_key_to_string(key)); + break; + } + + klvs += len; + num_dwords -= len; + } + + /* we don't expect any leftovers, fix if KLV header is ever changed */ + BUILD_BUG_ON(GUC_KLV_LEN_MIN > 1); +} + +/** + * xe_guc_klv_count - Count KLVs present in the buffer. + * @klvs: the buffer with KLVs + * @num_dwords: number of dwords (u32) in the buffer + * + * Return: number of recognized KLVs or + * a negative error code if KLV buffer is truncated. + */ +int xe_guc_klv_count(const u32 *klvs, u32 num_dwords) +{ + int num_klvs = 0; + + while (num_dwords >= GUC_KLV_LEN_MIN) { + u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]); + + if (num_dwords < len + GUC_KLV_LEN_MIN) + break; + + klvs += GUC_KLV_LEN_MIN + len; + num_dwords -= GUC_KLV_LEN_MIN + len; + num_klvs++; + } + + return num_dwords ? -ENODATA : num_klvs; +} diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h new file mode 100644 index 000000000000..b835e0ebe6db --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_KLV_HELPERS_H_ +#define _XE_GUC_KLV_HELPERS_H_ + +#include + +struct drm_printer; + +const char *xe_guc_klv_key_to_string(u16 key); + +void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p); +int xe_guc_klv_count(const u32 *klvs, u32 num_dwords); + +/** + * PREP_GUC_KLV - Prepare KLV header value based on provided key and len. + * @key: KLV key + * @len: KLV length + * + * Return: value of the KLV header (u32). + */ +#define PREP_GUC_KLV(key, len) \ + (FIELD_PREP(GUC_KLV_0_KEY, (key)) | \ + FIELD_PREP(GUC_KLV_0_LEN, (len))) + +/** + * PREP_GUC_KLV_CONST - Prepare KLV header value based on const key and len. + * @key: const KLV key + * @len: const KLV length + * + * Return: value of the KLV header (u32). + */ +#define PREP_GUC_KLV_CONST(key, len) \ + (FIELD_PREP_CONST(GUC_KLV_0_KEY, (key)) | \ + FIELD_PREP_CONST(GUC_KLV_0_LEN, (len))) + +/** + * PREP_GUC_KLV_TAG - Prepare KLV header value based on unique KLV definition tag. + * @TAG: unique tag of the KLV definition + * + * Combine separate KEY and LEN definitions of the KLV identified by the TAG. + * + * Return: value of the KLV header (u32). + */ +#define PREP_GUC_KLV_TAG(TAG) \ + PREP_GUC_KLV_CONST(GUC_KLV_##TAG##_KEY, GUC_KLV_##TAG##_LEN) + +#endif -- cgit v1.2.3 From 48b05e3c3dbbac4275c3e94bed68a36bec6bddfe Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 10 Apr 2024 19:03:38 +0200 Subject: drm/xe/pf: Add support to configure GuC SR-IOV policies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are few knobs inside GuC firmware to control VFs scheduling. Add basic functions to support their reconfigurations. We will start using them shortly once we prepare debugfs. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240410170338.1199-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c | 417 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h | 25 ++ drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h | 31 ++ drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h | 21 ++ drivers/gpu/drm/xe/xe_gt_types.h | 7 + 6 files changed, 502 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e8d2bd31e537..522bffa5c4e1 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -161,6 +161,7 @@ xe-y += \ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf_control.o \ + xe_gt_sriov_pf_policy.o \ xe_lmtt.o \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c new file mode 100644 index 000000000000..3eaa17ca54fc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include "abi/guc_actions_sriov_abi.h" + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_policy.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc_ct.h" +#include "xe_guc_klv_helpers.h" + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int guc_action_update_vgt_policy(struct xe_guc *guc, u64 addr, u32 size) +{ + u32 request[] = { + GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY, + lower_32_bits(addr), + upper_32_bits(addr), + size, + }; + + return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); +} + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int pf_send_policy_klvs(struct xe_gt *gt, const u32 *klvs, u32 num_dwords) +{ + const u32 bytes = num_dwords * sizeof(u32); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_guc *guc = >->uc.guc; + struct xe_bo *bo; + int ret; + + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(bytes, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); + + ret = guc_action_update_vgt_policy(guc, xe_bo_ggtt_addr(bo), num_dwords); + + xe_bo_unpin_map_no_vm(bo); + + return ret; +} + +/* + * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, + * negative error code on failure. + */ +static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + int ret; + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + ret = pf_send_policy_klvs(gt, klvs, num_dwords); + + if (ret != num_klvs) { + int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; + struct drm_printer p = xe_gt_info_printer(gt); + + xe_gt_sriov_notice(gt, "Failed to push %u policy KLV%s (%pe)\n", + num_klvs, str_plural(num_klvs), ERR_PTR(err)); + xe_guc_klv_print(klvs, num_dwords, &p); + return err; + } + + return 0; +} + +static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value) +{ + u32 klv[] = { + PREP_GUC_KLV(key, 1), + value, + }; + + return pf_push_policy_klvs(gt, 1, klv, ARRAY_SIZE(klv)); +} + +static int pf_update_policy_bool(struct xe_gt *gt, u16 key, bool *policy, bool value) +{ + int err; + + err = pf_push_policy_u32(gt, key, value); + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n", + key, xe_guc_klv_key_to_string(key), + str_enabled_disabled(value), ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to '%s'\n", + key, xe_guc_klv_key_to_string(key), + str_enabled_disabled(value)); + + *policy = value; + return 0; +} + +static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 value) +{ + int err; + + err = pf_push_policy_u32(gt, key, value); + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n", + key, xe_guc_klv_key_to_string(key), + str_enabled_disabled(value), ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to %u\n", + key, xe_guc_klv_key_to_string(key), value); + + *policy = value; + return 0; +} + +static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, + >->sriov.pf.policy.guc.sched_if_idle, + enable); +} + +static int pf_reprovision_sched_if_idle(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_provision_sched_if_idle(gt, gt->sriov.pf.policy.guc.sched_if_idle); +} + +static void pf_sanitize_sched_if_idle(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + gt->sriov.pf.policy.guc.sched_if_idle = false; +} + +/** + * xe_gt_sriov_pf_policy_set_sched_if_idle - Control the 'sched_if_idle' policy. + * @gt: the &xe_gt where to apply the policy + * @enable: the value of the 'sched_if_idle' policy + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_sched_if_idle(gt, enable); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return err; +} + +/** + * xe_gt_sriov_pf_policy_get_sched_if_idle - Retrieve value of 'sched_if_idle' policy. + * @gt: the &xe_gt where to read the policy from + * + * This function can only be called on PF. + * + * Return: value of 'sched_if_idle' policy. + */ +bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt) +{ + bool enable; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + enable = gt->sriov.pf.policy.guc.sched_if_idle; + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return enable; +} + +static int pf_provision_reset_engine(struct xe_gt *gt, bool enable) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY, + >->sriov.pf.policy.guc.reset_engine, enable); +} + +static int pf_reprovision_reset_engine(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_provision_reset_engine(gt, gt->sriov.pf.policy.guc.reset_engine); +} + +static void pf_sanitize_reset_engine(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + gt->sriov.pf.policy.guc.reset_engine = false; +} + +/** + * xe_gt_sriov_pf_policy_set_reset_engine - Control the 'reset_engine' policy. + * @gt: the &xe_gt where to apply the policy + * @enable: the value of the 'reset_engine' policy + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_reset_engine(gt, enable); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return err; +} + +/** + * xe_gt_sriov_pf_policy_get_reset_engine - Retrieve value of 'reset_engine' policy. + * @gt: the &xe_gt where to read the policy from + * + * This function can only be called on PF. + * + * Return: value of 'reset_engine' policy. + */ +bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt) +{ + bool enable; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + enable = gt->sriov.pf.policy.guc.reset_engine; + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return enable; +} + +static int pf_provision_sample_period(struct xe_gt *gt, u32 value) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_update_policy_u32(gt, GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY, + >->sriov.pf.policy.guc.sample_period, value); +} + +static int pf_reprovision_sample_period(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_provision_sample_period(gt, gt->sriov.pf.policy.guc.sample_period); +} + +static void pf_sanitize_sample_period(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + gt->sriov.pf.policy.guc.sample_period = 0; +} + +/** + * xe_gt_sriov_pf_policy_set_sample_period - Control the 'sample_period' policy. + * @gt: the &xe_gt where to apply the policy + * @value: the value of the 'sample_period' policy + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_sample_period(gt, value); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return err; +} + +/** + * xe_gt_sriov_pf_policy_get_sample_period - Retrieve value of 'sample_period' policy. + * @gt: the &xe_gt where to read the policy from + * + * This function can only be called on PF. + * + * Return: value of 'sample_period' policy. + */ +u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt) +{ + u32 value; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + value = gt->sriov.pf.policy.guc.sample_period; + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return value; +} + +static void pf_sanitize_guc_policies(struct xe_gt *gt) +{ + pf_sanitize_sched_if_idle(gt); + pf_sanitize_reset_engine(gt); + pf_sanitize_sample_period(gt); +} + +/** + * xe_gt_sriov_pf_policy_sanitize - Reset policy settings. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt) +{ + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_sanitize_guc_policies(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); +} + +/** + * xe_gt_sriov_pf_policy_reprovision - Reprovision (and optionally reset) policy settings. + * @gt: the &xe_gt + * @reset: if true will reprovision using default values instead of latest + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset) +{ + int err = 0; + + xe_device_mem_access_get(gt_to_xe(gt)); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (reset) + pf_sanitize_guc_policies(gt); + err |= pf_reprovision_sched_if_idle(gt); + err |= pf_reprovision_reset_engine(gt); + err |= pf_reprovision_sample_period(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + xe_device_mem_access_put(gt_to_xe(gt)); + + return err ? -ENXIO : 0; +} + +static void print_guc_policies(struct drm_printer *p, struct xe_gt_sriov_guc_policies *policy) +{ + drm_printf(p, "%s:\t%s\n", + xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY), + str_enabled_disabled(policy->sched_if_idle)); + drm_printf(p, "%s:\t%s\n", + xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY), + str_enabled_disabled(policy->reset_engine)); + drm_printf(p, "%s:\t%u %s\n", + xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY), + policy->sample_period, policy->sample_period ? "ms" : "(disabled)"); +} + +/** + * xe_gt_sriov_pf_policy_print - Dump actual policy values. + * @gt: the &xe_gt where to read the policy from + * @p: the &drm_printer + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + print_guc_policies(p, >->sriov.pf.policy.guc); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h new file mode 100644 index 000000000000..2a5dc33dc6d7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_POLICY_H_ +#define _XE_GT_SRIOV_PF_POLICY_H_ + +#include + +struct drm_printer; +struct xe_gt; + +int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable); +bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt); +int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable); +bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt); +int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value); +u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt); + +void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt); +int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset); +int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h new file mode 100644 index 000000000000..4de532af135e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_POLICY_TYPES_H_ +#define _XE_GT_SRIOV_PF_POLICY_TYPES_H_ + +#include + +/** + * struct xe_gt_sriov_guc_policies - GuC SR-IOV policies. + * @sched_if_idle: controls strict scheduling policy. + * @reset_engine: controls engines reset on VF switch policy. + * @sample_period: adverse events sampling period (in milliseconds). + */ +struct xe_gt_sriov_guc_policies { + bool sched_if_idle; + bool reset_engine; + u32 sample_period; +}; + +/** + * struct xe_gt_sriov_pf_policy - PF policy data. + * @guc: GuC scheduling policies. + */ +struct xe_gt_sriov_pf_policy { + struct xe_gt_sriov_guc_policies guc; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h new file mode 100644 index 000000000000..768277b8bc95 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_TYPES_H_ +#define _XE_GT_SRIOV_PF_TYPES_H_ + +#include + +#include "xe_gt_sriov_pf_policy_types.h" + +/** + * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @policy: policy data. + */ +struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_policy policy; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 2143dffcaf11..882953d9b87d 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -8,6 +8,7 @@ #include "xe_force_wake_types.h" #include "xe_gt_idle_types.h" +#include "xe_gt_sriov_pf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_reg_sr_types.h" @@ -140,6 +141,12 @@ struct xe_gt { u32 adj_offset; } mmio; + /** @sriov: virtualization data related to GT */ + union { + /** @sriov.pf: PF data. Valid only if driver is running as PF */ + struct xe_gt_sriov_pf pf; + } sriov; + /** * @reg_sr: table with registers to be restored on GT init/resume/reset */ -- cgit v1.2.3 From 5f36d1ce4df25eebc663c1996d7c73aedfb309e6 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 10 Apr 2024 12:37:22 +0000 Subject: drm/xe/gt: Add L3 bank mask to GT topology Generate the mask of enabled L3 banks for the GT. It is stored with the rest of the GT topology in a consistent representation across platforms. For now the L3 bank mask is just printed in the log for developers to easily figure out the fusing characteristics of machines that they are trying to debug issues on. Later it can be used to replace existing code in the driver that requires the L3 bank count (not mask). Also the mask can easily be exposed to user space in a new query if needed. v2: Better naming of variable and function (Matt Roper) Bspec: 52545, 52546, 62482 Cc: Matt Roper Signed-off-by: Francois Dugast Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240410123723.7-2-francois.dugast@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 + drivers/gpu/drm/xe/xe_gt_topology.c | 112 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 13 ++-- 3 files changed, 124 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 8fe811ea404a..94445810ccc9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -165,7 +165,10 @@ #define MIRROR_FUSE3 XE_REG(0x9118) #define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16) #define L3BANK_PAIR_COUNT 4 +#define XEHPC_GT_L3_MODE_MASK REG_GENMASK(7, 4) +#define XE2_GT_L3_MODE_MASK REG_GENMASK(7, 4) #define L3BANK_MASK REG_GENMASK(3, 0) +#define XELP_GT_L3_MODE_MASK REG_GENMASK(7, 0) /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ #define MAX_MSLICES 4 #define MEML3_EN_MASK REG_GENMASK(3, 0) diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index f5773a14f3c8..3733e7a6860d 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -8,6 +8,7 @@ #include #include "regs/xe_gt_regs.h" +#include "xe_assert.h" #include "xe_gt.h" #include "xe_mmio.h" @@ -59,6 +60,114 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); } +/** + * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask + * + * It is used to compute the L3 bank masks in a generic format on + * various platforms where the internal representation of L3 node + * and masks from registers are different. + * + * @xe: device + * @dst: destination + * @pattern: pattern to replicate + * @patternbits: size of the pattern, in bits + * @mask: mask describing where to replicate the pattern + * + * Example 1: + * ---------- + * @pattern = 0b1111 + * └┬─┘ + * @patternbits = 4 (bits) + * @mask = 0b0101 + * ││││ + * │││└────────────────── 0b1111 (=1×0b1111) + * ││└──────────── 0b0000 │ (=0×0b1111) + * │└────── 0b1111 │ │ (=1×0b1111) + * └ 0b0000 │ │ │ (=0×0b1111) + * │ │ │ │ + * @dst = 0b0000 0b1111 0b0000 0b1111 + * + * Example 2: + * ---------- + * @pattern = 0b11111111 + * └┬─────┘ + * @patternbits = 8 (bits) + * @mask = 0b10 + * ││ + * ││ + * ││ + * │└────────── 0b00000000 (=0×0b11111111) + * └ 0b11111111 │ (=1×0b11111111) + * │ │ + * @dst = 0b11111111 0b00000000 + */ +static void +gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, + xe_l3_bank_mask_t pattern, int patternbits, + unsigned long mask) +{ + unsigned long bit; + + xe_assert(xe, fls(mask) <= patternbits); + for_each_set_bit(bit, &mask, 32) { + xe_l3_bank_mask_t shifted_pattern = {}; + + bitmap_shift_left(shifted_pattern, pattern, bit * patternbits, + XE_MAX_L3_BANK_MASK_BITS); + bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS); + } +} + +static void +load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3); + + if (GRAPHICS_VER(xe) >= 20) { + xe_l3_bank_mask_t per_node = {}; + u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); + u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); + + bitmap_from_arr32(per_node, &bank_val, 32); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, + meml3_en); + } else if (GRAPHICS_VERx100(xe) >= 1270) { + xe_l3_bank_mask_t per_node = {}; + xe_l3_bank_mask_t per_mask_bit = {}; + u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); + u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4); + u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); + + bitmap_set_value8(per_mask_bit, 0x3, 0); + gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, + meml3_en); + } else if (xe->info.platform == XE_PVC) { + xe_l3_bank_mask_t per_node = {}; + xe_l3_bank_mask_t per_mask_bit = {}; + u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); + u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3); + + bitmap_set_value8(per_mask_bit, 0xf, 0); + gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4, + bank_val); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16, + meml3_en); + } else if (xe->info.platform == XE_DG2) { + xe_l3_bank_mask_t per_node = {}; + u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3); + + bitmap_set_value8(per_node, 0xff, 0); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask); + } else { + /* 1:1 register bit to mask bit (inverted register bits) */ + u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3); + + bitmap_from_arr32(l3_bank_mask, &mask, 32); + } +} + static void get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) { @@ -103,6 +212,7 @@ xe_gt_topology_init(struct xe_gt *gt) XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); @@ -120,6 +230,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, gt->fuse_topo.eu_mask_per_dss); + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, + gt->fuse_topo.l3_bank_mask); } /* diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 882953d9b87d..cfdc761ff7f4 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -25,13 +25,15 @@ enum xe_gt_type { XE_GT_TYPE_MEDIA, }; -#define XE_MAX_DSS_FUSE_REGS 3 -#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) -#define XE_MAX_EU_FUSE_REGS 1 -#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) +#define XE_MAX_DSS_FUSE_REGS 3 +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) +#define XE_MAX_EU_FUSE_REGS 1 +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) +#define XE_MAX_L3_BANK_MASK_BITS 64 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)]; typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)]; +typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)]; struct xe_mmio_range { u32 start; @@ -334,6 +336,9 @@ struct xe_gt { /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/ xe_eu_mask_t eu_mask_per_dss; + + /** @fuse_topo.l3_bank_mask: L3 bank mask */ + xe_l3_bank_mask_t l3_bank_mask; } fuse_topo; /** @steering: register steering for individual HW units */ -- cgit v1.2.3 From c7201728f9832b0822dcc4f2843ffb050059ee2b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 15 Apr 2024 19:39:32 +0200 Subject: drm/xe: Add helper to format SR-IOV function name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the GuC firmware and the Xe driver are using VF identifier VFID(0) to represent the Physical Function, we should avoid using "VF0" name and use proper "PF" name in all user facing messages related to the Physical Function and use "VFn" name only when referrinf to the true Virtual Function. Add simple helper to get properly formatted function name based on the function number. Reviewed-by: Piotr Piórkowski Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240415173937.1287-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_sriov.h | 1 + 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 1b40f5de9ef5..1c3fa84b6adb 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -122,3 +122,20 @@ void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "enabled: %s\n", str_yes_no(IS_SRIOV(xe))); drm_printf(p, "mode: %s\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); } + +/** + * xe_sriov_function_name() - Get SR-IOV Function name. + * @n: the Function number (identifier) to get name of + * @buf: the buffer to format to + * @size: size of the buffer (shall be at least 5 bytes) + * + * Return: formatted function name ("PF" or "VF%u"). + */ +const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size) +{ + if (n) + snprintf(buf, size, "VF%u", n); + else + strscpy(buf, "PF", size); + return buf; +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index f9dec84d77e3..486bb21c3256 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -13,6 +13,7 @@ struct drm_printer; const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode); +const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len); void xe_sriov_probe_early(struct xe_device *xe); void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); -- cgit v1.2.3 From 9e56d026c6be4ad124b47dc5ea000bbb888841e0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 15 Apr 2024 19:39:33 +0200 Subject: drm/xe: Allow to assign GGTT region to the VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VF's drivers can't modify GGTT PTEs except the range explicitly assigned by the PF driver. To allow hardware enforcement of this requirement, each GGTT PTE has a field with the VF number that identifies which VF can modify that particular GGTT PTE entry. Only PF driver can modify this field and PF driver shall do that before VF drivers will be loaded. Add function to prepare PTEs. Since it will be used only by the PF driver, make it available only for CONFIG_PCI_IOV=y. Bspec: 45015, 52395 Reviewed-by: Piotr Piórkowski Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240415173937.1287-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/regs/xe_gtt_defs.h | 2 ++ drivers/gpu/drm/xe/xe_ggtt.c | 44 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 4 ++++ 3 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h index 558519ce48c7..4389e5a76f89 100644 --- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h @@ -9,6 +9,8 @@ #define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) #define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) +#define GGTT_PTE_VFID GENMASK_ULL(11, 2) + #define GUC_GGTT_TOP 0xFEE00000 #define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ff2239c0eda5..f090cab065b8 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -460,6 +460,50 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } +#ifdef CONFIG_PCI_IOV +static u64 xe_encode_vfid_pte(u16 vfid) +{ + return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; +} + +static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +{ + u64 start = node->start; + u64 size = node->size; + u64 end = start + size - 1; + u64 pte = xe_encode_vfid_pte(vfid); + + lockdep_assert_held(&ggtt->lock); + + if (!drm_mm_node_allocated(node)) + return; + + while (start < end) { + xe_ggtt_set_pte(ggtt, start, pte); + start += XE_PAGE_SIZE; + } + + xe_ggtt_invalidate(ggtt); +} + +/** + * xe_ggtt_assign - assign a GGTT region to the VF + * @ggtt: the &xe_ggtt where the node belongs + * @node: the &drm_mm_node to update + * @vfid: the VF identifier + * + * This function is used by the PF driver to assign a GGTT region to the VF. + * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some + * platforms VFs can't modify that either. + */ +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +{ + mutex_lock(&ggtt->lock); + xe_ggtt_assign_locked(ggtt, node, vfid); + mutex_unlock(&ggtt->lock); +} +#endif + int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { int err; diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 8306ef74abc6..4a41a1762358 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -33,4 +33,8 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); +#ifdef CONFIG_PCI_IOV +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); +#endif + #endif -- cgit v1.2.3 From bda438b8badc34d4752c46adf4ce0e5524e230ff Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 15 Apr 2024 19:39:34 +0200 Subject: drm/xe: Add xe_ttm_vram_get_avail The PF driver will need to know size of the remaining available VRAM to estimate fair VRAM allocations that could be used across all VFs in automatic VFs provisioning mode. Add helper function for that. We will use it in upcoming patch. Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240415173937.1287-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 0678faf83212..8a1f460ff20b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -478,3 +478,15 @@ void xe_ttm_vram_get_used(struct ttm_resource_manager *man, *used_visible = mgr->visible_size - mgr->visible_avail; mutex_unlock(&mgr->lock); } + +u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + u64 avail; + + mutex_lock(&mgr->lock); + avail = mgr->mm.avail; + mutex_unlock(&mgr->lock); + + return avail; +} diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index d184e19a9230..cc76050e376d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -25,6 +25,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt); +u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man); u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man); void xe_ttm_vram_get_used(struct ttm_resource_manager *man, u64 *used, u64 *used_visible); -- cgit v1.2.3 From 3f11bcc6564f4e890d023437f63adaa102d3d78e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 15 Apr 2024 19:39:35 +0200 Subject: drm/xe/guc: Add PF2GUC_UPDATE_VF_CFG to ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In upcoming patches the PF driver will add support to change VFs configuration and will need to use PF2GUC_UPDATE_VF_CFG messages. Add necessary definitions to our GuC firmware ABI header. Definitions of the GuC VF Configuration KLVs used by this action are already present in abi/guc_klvs_abi.h Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240415173937.1287-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 2d829e812c61..c1ad09b36453 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -260,6 +260,61 @@ #define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN #define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0 +/** + * DOC: PF2GUC_UPDATE_VF_CFG + * + * The `PF2GUC_UPDATE_VF_CFG`_ message is used by PF to provision single VF in GuC. + * + * This message must be sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VF_CFG` = 0x5503 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VFID** - identifier of the VF that the KLV | + * | | | configurations are being applied to | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that represents | + * | | | the start of a list of virtualization related KLV configs | + * | | | that are to be applied to the VF. | + * | | | If this parameter is zero, the list is not parsed. | + * | | | If full configs address parameter is zero and configs_size is| + * | | | zero associated VF config shall be reset to its default state| + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **CFG_ADDR_HI** - upper 32 bits of configs address. | + * +---+-------+--------------------------------------------------------------+ + * | 4 | 31:0 | **CFG_SIZE** - size (in dwords) of the config buffer | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNT** - number of KLVs successfully applied | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_PF2GUC_UPDATE_VF_CFG 0x5503u + +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 4u) +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_1_VFID GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_2_CFG_ADDR_LO GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_3_CFG_ADDR_HI GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_4_CFG_SIZE GUC_HXG_REQUEST_MSG_n_DATAn + +#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0 + /** * DOC: PF2GUC_VF_CONTROL * -- cgit v1.2.3 From 1f2880bab254918dd596153de77fcbee6947c6bc Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 15 Apr 2024 19:39:36 +0200 Subject: drm/xe/pf: Add SR-IOV PF specific early GT initialization The PF driver must maintain additional GT level data per each VF. This additional per-VF data will be added in upcoming patches and will include: provisioning configuration (like GGTT space or LMEM allocation sizes or scheduling parameters), monitoring thresholds and counters, and more. As number of supported VFs varies across platforms use flexible array where first entry will contain metadata for the PF itself (if such configuration parameter is applicable for the PF) and all remaining entries will contain data for potential VFs. Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240415173937.1287-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt.c | 7 +++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 52 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 20 ++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h | 9 ++++++ 5 files changed, 89 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 522bffa5c4e1..39f47909b466 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -160,6 +160,7 @@ xe-y += \ xe_sriov.o xe-$(CONFIG_PCI_IOV) += \ + xe_gt_sriov_pf.o \ xe_gt_sriov_pf_control.o \ xe_gt_sriov_pf_policy.o \ xe_lmtt.o \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index cfa5da900461..38956b60e084 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -29,6 +29,7 @@ #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_pf.h" #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" @@ -311,6 +312,12 @@ int xe_gt_init_early(struct xe_gt *gt) { int err; + if (IS_SRIOV_PF(gt_to_xe(gt))) { + err = xe_gt_sriov_pf_init_early(gt); + if (err) + return err; + } + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c new file mode 100644 index 000000000000..791dcdd767e2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_pf_helpers.h" + +/* + * VF's metadata is maintained in the flexible array where: + * - entry [0] contains metadata for the PF (only if applicable), + * - entries [1..n] contain metadata for VF1..VFn:: + * + * <--------------------------- 1 + total_vfs -----------> + * +-------+-------+-------+-----------------------+-------+ + * | 0 | 1 | 2 | | n | + * +-------+-------+-------+-----------------------+-------+ + * | PF | VF1 | VF2 | ... ... | VFn | + * +-------+-------+-------+-----------------------+-------+ + */ +static int pf_alloc_metadata(struct xe_gt *gt) +{ + unsigned int num_vfs = xe_gt_sriov_pf_get_totalvfs(gt); + + gt->sriov.pf.vfs = drmm_kcalloc(>_to_xe(gt)->drm, 1 + num_vfs, + sizeof(*gt->sriov.pf.vfs), GFP_KERNEL); + if (!gt->sriov.pf.vfs) + return -ENOMEM; + + return 0; +} + +/** + * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. + * @gt: the &xe_gt to initialize + * + * Early initialization of the PF data. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_init_early(struct xe_gt *gt) +{ + int err; + + err = pf_alloc_metadata(gt); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h new file mode 100644 index 000000000000..05142ffc4319 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_H_ +#define _XE_GT_SRIOV_PF_H_ + +struct xe_gt; + +#ifdef CONFIG_PCI_IOV +int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +#else +static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) +{ + return 0; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 768277b8bc95..223f280ef748 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -10,12 +10,21 @@ #include "xe_gt_sriov_pf_policy_types.h" +/** + * struct xe_gt_sriov_metadata - GT level per-VF metadata. + */ +struct xe_gt_sriov_metadata { + /* XXX: VF metadata will go here */ +}; + /** * struct xe_gt_sriov_pf - GT level PF virtualization data. * @policy: policy data. + * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { struct xe_gt_sriov_pf_policy policy; + struct xe_gt_sriov_metadata *vfs; }; #endif -- cgit v1.2.3 From ac6598aed1b36d0301fa43732ad40d440dc86620 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 15 Apr 2024 19:39:37 +0200 Subject: drm/xe/pf: Add support to configure SR-IOV VFs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To run correctly, each Virtual Function must be provisioned with some chunk of shared hardware or firmware resources (like GGTT, device memory, GuC doorbell IDs, GuC context IDs) and scheduling parameters (execution quantum or preemption timeout). All resources assigned to VFs must be excluded from the PF driver use and may require some additional preparation steps (like setup of the LMTT or update of the GGTT PTE). Those provisioning details must be then sent to the GuC firmware as most of those details will be shared later with the VF drivers during their boot. Add basic functions to provision VFs with all hardware resources or scheduling parameters. We will use them shortly in upcoming patches either in manual provisioning over debugfs, exposed to the advanced users, or automatic provisioning done by PF driver during VFs enabling. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240415173937.1287-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 1973 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h | 56 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h | 54 + drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h | 6 +- 5 files changed, 2089 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 39f47909b466..8bc62bfbc679 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -161,6 +161,7 @@ xe-y += \ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ + xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ xe_gt_sriov_pf_policy.o \ xe_lmtt.o \ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c new file mode 100644 index 000000000000..0f5614877770 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -0,0 +1,1973 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include +#include + +#include "abi/guc_actions_sriov_abi.h" +#include "abi/guc_klvs_abi.h" + +#include "regs/xe_guc_regs.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_policy.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_db_mgr.h" +#include "xe_guc_fwif.h" +#include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" +#include "xe_guc_submit.h" +#include "xe_lmtt.h" +#include "xe_map.h" +#include "xe_sriov.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_wopcm.h" + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int guc_action_update_vf_cfg(struct xe_guc *guc, u32 vfid, + u64 addr, u32 size) +{ + u32 request[] = { + GUC_ACTION_PF2GUC_UPDATE_VF_CFG, + vfid, + lower_32_bits(addr), + upper_32_bits(addr), + size, + }; + + return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); +} + +/* + * Return: 0 on success, negative error code on failure. + */ +static int pf_send_vf_cfg_reset(struct xe_gt *gt, u32 vfid) +{ + struct xe_guc *guc = >->uc.guc; + int ret; + + ret = guc_action_update_vf_cfg(guc, vfid, 0, 0); + + return ret <= 0 ? ret : -EPROTO; +} + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int pf_send_vf_cfg_klvs(struct xe_gt *gt, u32 vfid, const u32 *klvs, u32 num_dwords) +{ + const u32 bytes = num_dwords * sizeof(u32); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_guc *guc = >->uc.guc; + struct xe_bo *bo; + int ret; + + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(bytes, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); + + ret = guc_action_update_vf_cfg(guc, vfid, xe_bo_ggtt_addr(bo), num_dwords); + + xe_bo_unpin_map_no_vm(bo); + + return ret; +} + +/* + * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, + * negative error code on failure. + */ +static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + int ret; + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + ret = pf_send_vf_cfg_klvs(gt, vfid, klvs, num_dwords); + + if (ret != num_klvs) { + int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; + struct drm_printer p = xe_gt_info_printer(gt); + char name[8]; + + xe_gt_sriov_notice(gt, "Failed to push %s %u config KLV%s (%pe)\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs), ERR_PTR(err)); + xe_guc_klv_print(klvs, num_dwords, &p); + return err; + } + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { + struct drm_printer p = xe_gt_info_printer(gt); + + xe_guc_klv_print(klvs, num_dwords, &p); + } + + return 0; +} + +static int pf_push_vf_cfg_u32(struct xe_gt *gt, unsigned int vfid, u16 key, u32 value) +{ + u32 klv[] = { + FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 1), + value, + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv)); +} + +static int pf_push_vf_cfg_u64(struct xe_gt *gt, unsigned int vfid, u16 key, u64 value) +{ + u32 klv[] = { + FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 2), + lower_32_bits(value), + upper_32_bits(value), + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv)); +} + +static int pf_push_vf_cfg_ggtt(struct xe_gt *gt, unsigned int vfid, u64 start, u64 size) +{ + u32 klvs[] = { + PREP_GUC_KLV_TAG(VF_CFG_GGTT_START), + lower_32_bits(start), + upper_32_bits(start), + PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE), + lower_32_bits(size), + upper_32_bits(size), + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); +} + +static int pf_push_vf_cfg_ctxs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num) +{ + u32 klvs[] = { + PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID), + begin, + PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS), + num, + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); +} + +static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num) +{ + u32 klvs[] = { + PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID), + begin, + PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS), + num, + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); +} + +static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum) +{ + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum); +} + +static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout) +{ + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout); +} + +static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size); +} + +static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return >->sriov.pf.vfs[vfid].config; +} + +/* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) +{ + u32 n = 0; + + if (drm_mm_node_allocated(&config->ggtt_region)) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(config->ggtt_region.start); + cfg[n++] = upper_32_bits(config->ggtt_region.start); + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(config->ggtt_region.size); + cfg[n++] = upper_32_bits(config->ggtt_region.size); + } + + return n; +} + +/* Return: number of configuration dwords written */ +static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config) +{ + u32 n = 0; + + n += encode_config_ggtt(cfg, config); + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID); + cfg[n++] = config->begin_ctx; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS); + cfg[n++] = config->num_ctxs; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID); + cfg[n++] = config->begin_db; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS); + cfg[n++] = config->num_dbs; + + if (config->lmem_obj) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); + cfg[n++] = lower_32_bits(config->lmem_obj->size); + cfg[n++] = upper_32_bits(config->lmem_obj->size); + } + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); + cfg[n++] = config->exec_quantum; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT); + cfg[n++] = config->preempt_timeout; + + return n; +} + +static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + u32 max_cfg_dwords = SZ_4K / sizeof(u32); + u32 num_dwords; + int num_klvs; + u32 *cfg; + int err; + + cfg = kcalloc(max_cfg_dwords, sizeof(u32), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + num_dwords = encode_config(cfg, config); + xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + + if (xe_gt_is_media_type(gt)) { + struct xe_gt *primary = gt->tile->primary_gt; + struct xe_gt_sriov_config *other = pf_pick_vf_config(primary, vfid); + + /* media-GT will never include a GGTT config */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config)); + + /* the GGTT config must be taken from the primary-GT instead */ + num_dwords += encode_config_ggtt(cfg + num_dwords, other); + } + xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + + num_klvs = xe_guc_klv_count(cfg, num_dwords); + err = pf_push_vf_cfg_klvs(gt, vfid, num_klvs, cfg, num_dwords); + + kfree(cfg); + return err; +} + +static u64 pf_get_ggtt_alignment(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + return IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; +} + +static u64 pf_get_min_spare_ggtt(struct xe_gt *gt) +{ + /* XXX: preliminary */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? + pf_get_ggtt_alignment(gt) : SZ_64M; +} + +static u64 pf_get_spare_ggtt(struct xe_gt *gt) +{ + u64 spare; + + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.ggtt_size; + spare = max_t(u64, spare, pf_get_min_spare_ggtt(gt)); + + return spare; +} + +static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) +{ + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (size && size < pf_get_min_spare_ggtt(gt)) + return -EINVAL; + + size = round_up(size, pf_get_ggtt_alignment(gt)); + gt->sriov.pf.spare.ggtt_size = size; + + return 0; +} + +static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u64 start, u64 size) +{ + int err, err2 = 0; + + err = pf_push_vf_cfg_ggtt(tile->primary_gt, vfid, start, size); + + if (tile->media_gt && !err) + err2 = pf_push_vf_cfg_ggtt(tile->media_gt, vfid, start, size); + + return err ?: err2; +} + +static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + if (drm_mm_node_allocated(node)) { + /* + * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() + * is redundant, as PTE will be implicitly re-assigned to PF by + * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). + */ + xe_ggtt_remove_node(ggtt, node, false); + } +} + +static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); +} + +static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct drm_mm_node *node = &config->ggtt_region; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_ggtt *ggtt = tile->mem.ggtt; + u64 alignment = pf_get_ggtt_alignment(gt); + int err; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + size = round_up(size, alignment); + + if (drm_mm_node_allocated(node)) { + err = pf_distribute_config_ggtt(tile, vfid, 0, 0); + if (unlikely(err)) + return err; + + pf_release_ggtt(tile, node); + } + xe_gt_assert(gt, !drm_mm_node_allocated(node)); + + if (!size) + return 0; + + err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); + if (unlikely(err)) + return err; + + xe_ggtt_assign(ggtt, node, vfid); + xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", + vfid, node->start, node->start + node->size - 1); + + err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); + if (unlikely(err)) + return err; + + return 0; +} + +static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct drm_mm_node *node = &config->ggtt_region; + + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + return drm_mm_node_allocated(node) ? node->size : 0; +} + +/** + * xe_gt_sriov_pf_config_get_ggtt - Query size of GGTT address space of the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: size of the VF's assigned (or PF's spare) GGTT address space. + */ +u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid) +{ + u64 size; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + size = pf_get_vf_config_ggtt(gt_to_tile(gt)->primary_gt, vfid); + else + size = pf_get_spare_ggtt(gt_to_tile(gt)->primary_gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return size; +} + +static int pf_config_set_u64_done(struct xe_gt *gt, unsigned int vfid, u64 value, + u64 actual, const char *what, int err) +{ + char size[10]; + char name[8]; + + xe_sriov_function_name(vfid, name, sizeof(name)); + + if (unlikely(err)) { + string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_notice(gt, "Failed to provision %s with %llu (%s) %s (%pe)\n", + name, value, size, what, ERR_PTR(err)); + string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_info(gt, "%s provisioning remains at %llu (%s) %s\n", + name, actual, size, what); + return err; + } + + /* the actual value may have changed during provisioning */ + string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_info(gt, "%s provisioned with %llu (%s) %s\n", + name, actual, size, what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_set_ggtt - Provision VF with GGTT space. + * @gt: the &xe_gt (can't be media) + * @vfid: the VF identifier + * @size: requested GGTT size + * + * If &vfid represents PF, then function will change PF's spare GGTT config. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + int err; + + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_ggtt(gt, vfid, size); + else + err = pf_set_spare_ggtt(gt, size); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u64_done(gt, vfid, size, + xe_gt_sriov_pf_config_get_ggtt(gt, vfid), + vfid ? "GGTT" : "spare GGTT", err); +} + +static int pf_config_bulk_set_u64_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs, + u64 value, u64 (*get)(struct xe_gt*, unsigned int), + const char *what, unsigned int last, int err) +{ + char size[10]; + + xe_gt_assert(gt, first); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, first <= last); + + if (num_vfs == 1) + return pf_config_set_u64_done(gt, first, value, get(gt, first), what, err); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n", + first, first + num_vfs - 1, what); + if (last > first) + pf_config_bulk_set_u64_done(gt, first, last - first, value, + get, what, last, 0); + return pf_config_set_u64_done(gt, last, value, get(gt, last), what, err); + } + + /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */ + value = get(gt, first); + string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %llu (%s) %s\n", + first, first + num_vfs - 1, value, size, what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_bulk_set_ggtt - Provision many VFs with GGTT. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @size: requested GGTT size + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_ggtt(gt, n, size); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, + xe_gt_sriov_pf_config_get_ggtt, + "GGTT", n, err); +} + +/* Return: size of the largest continuous GGTT region */ +static u64 pf_get_max_ggtt(struct xe_gt *gt) +{ + struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 alignment = pf_get_ggtt_alignment(gt); + u64 spare = pf_get_spare_ggtt(gt); + u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); + u64 hole_start, hole_end, hole_size; + u64 max_hole = 0; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", + hole_start, hole_size / SZ_1K); + spare -= min3(spare, hole_size, max_hole); + max_hole = max(max_hole, hole_size); + } + + mutex_unlock(&ggtt->lock); + + xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", + max_hole / SZ_1K, spare / SZ_1K); + return max_hole > spare ? max_hole - spare : 0; +} + +static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) +{ + u64 available = pf_get_max_ggtt(gt); + u64 alignment = pf_get_ggtt_alignment(gt); + u64 fair; + + /* + * To simplify the logic we only look at single largest GGTT region + * as that will be always the best fit for 1 VF case, and most likely + * will also nicely cover other cases where VFs are provisioned on the + * fresh and idle PF driver, without any stale GGTT allocations spread + * in the middle of the full GGTT range. + */ + + fair = div_u64(available, num_vfs); + fair = ALIGN_DOWN(fair, alignment); + xe_gt_sriov_dbg_verbose(gt, "GGTT available(%lluK) fair(%u x %lluK)\n", + available / SZ_1K, num_vfs, fair / SZ_1K); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u64 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_ggtt(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair); +} + +static u32 pf_get_min_spare_ctxs(struct xe_gt *gt) +{ + /* XXX: preliminary */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? + hweight64(gt->info.engine_mask) : SZ_256; +} + +static u32 pf_get_spare_ctxs(struct xe_gt *gt) +{ + u32 spare; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.num_ctxs; + spare = max_t(u32, spare, pf_get_min_spare_ctxs(gt)); + + return spare; +} + +static int pf_set_spare_ctxs(struct xe_gt *gt, u32 spare) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (spare > GUC_ID_MAX) + return -EINVAL; + + if (spare && spare < pf_get_min_spare_ctxs(gt)) + return -EINVAL; + + gt->sriov.pf.spare.num_ctxs = spare; + + return 0; +} + +/* Return: start ID or negative error code on failure */ +static int pf_reserve_ctxs(struct xe_gt *gt, u32 num) +{ + struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; + unsigned int spare = pf_get_spare_ctxs(gt); + + return xe_guc_id_mgr_reserve(idm, num, spare); +} + +static void pf_release_ctxs(struct xe_gt *gt, u32 start, u32 num) +{ + struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; + + if (num) + xe_guc_id_mgr_release(idm, start, num); +} + +static void pf_release_config_ctxs(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + pf_release_ctxs(gt, config->begin_ctx, config->num_ctxs); + config->begin_ctx = 0; + config->num_ctxs = 0; +} + +static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int ret; + + xe_gt_assert(gt, vfid); + + if (num_ctxs > GUC_ID_MAX) + return -EINVAL; + + if (config->num_ctxs) { + ret = pf_push_vf_cfg_ctxs(gt, vfid, 0, 0); + if (unlikely(ret)) + return ret; + + pf_release_config_ctxs(gt, config); + } + + if (!num_ctxs) + return 0; + + ret = pf_reserve_ctxs(gt, num_ctxs); + if (unlikely(ret < 0)) + return ret; + + config->begin_ctx = ret; + config->num_ctxs = num_ctxs; + + ret = pf_push_vf_cfg_ctxs(gt, vfid, config->begin_ctx, config->num_ctxs); + if (unlikely(ret)) { + pf_release_config_ctxs(gt, config); + return ret; + } + + xe_gt_sriov_dbg_verbose(gt, "VF%u contexts %u-%u\n", + vfid, config->begin_ctx, config->begin_ctx + config->num_ctxs - 1); + return 0; +} + +static u32 pf_get_vf_config_ctxs(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->num_ctxs; +} + +/** + * xe_gt_sriov_pf_config_get_ctxs - Get VF's GuC contexts IDs quota. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * If &vfid represents a PF then number of PF's spare GuC context IDs is returned. + * + * Return: VF's quota (or PF's spare). + */ +u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid) +{ + u32 num_ctxs; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + num_ctxs = pf_get_vf_config_ctxs(gt, vfid); + else + num_ctxs = pf_get_spare_ctxs(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return num_ctxs; +} + +static const char *no_unit(u32 unused) +{ + return ""; +} + +static const char *spare_unit(u32 unused) +{ + return " spare"; +} + +static int pf_config_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 value, u32 actual, + const char *what, const char *(*unit)(u32), int err) +{ + char name[8]; + + xe_sriov_function_name(vfid, name, sizeof(name)); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to provision %s with %u%s %s (%pe)\n", + name, value, unit(value), what, ERR_PTR(err)); + xe_gt_sriov_info(gt, "%s provisioning remains at %u%s %s\n", + name, actual, unit(actual), what); + return err; + } + + /* the actual value may have changed during provisioning */ + xe_gt_sriov_info(gt, "%s provisioned with %u%s %s\n", + name, actual, unit(actual), what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_set_ctxs - Configure GuC contexts IDs quota for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @num_ctxs: requested number of GuC contexts IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_ctxs(gt, vfid, num_ctxs); + else + err = pf_set_spare_ctxs(gt, num_ctxs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, num_ctxs, + xe_gt_sriov_pf_config_get_ctxs(gt, vfid), + "GuC context IDs", vfid ? no_unit : spare_unit, err); +} + +static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs, + u32 value, u32 (*get)(struct xe_gt*, unsigned int), + const char *what, const char *(*unit)(u32), + unsigned int last, int err) +{ + xe_gt_assert(gt, first); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, first <= last); + + if (num_vfs == 1) + return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n", + first, first + num_vfs - 1, what); + if (last > first) + pf_config_bulk_set_u32_done(gt, first, last - first, value, + get, what, unit, last, 0); + return pf_config_set_u32_done(gt, last, value, get(gt, last), what, unit, err); + } + + /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */ + value = get(gt, first); + xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n", + first, first + num_vfs - 1, value, unit(value), what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_bulk_set_ctxs - Provision many VFs with GuC context IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier + * @num_vfs: number of VFs to provision + * @num_ctxs: requested number of GuC contexts IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u32 num_ctxs) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_ctxs(gt, n, num_ctxs); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_ctxs, + xe_gt_sriov_pf_config_get_ctxs, + "GuC context IDs", no_unit, n, err); +} + +static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) +{ + struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; + u32 spare = pf_get_spare_ctxs(gt); + u32 fair = (idm->total - spare) / num_vfs; + int ret; + + for (; fair; --fair) { + ret = xe_guc_id_mgr_reserve(idm, fair * num_vfs, spare); + if (ret < 0) + continue; + xe_guc_id_mgr_release(idm, ret, fair * num_vfs); + break; + } + + xe_gt_sriov_dbg_verbose(gt, "contexts fair(%u x %u)\n", num_vfs, fair); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_ctxs - Provision many VFs with fair GuC context IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u32 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_ctxs(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair); +} + +static u32 pf_get_min_spare_dbs(struct xe_gt *gt) +{ + /* XXX: preliminary, we don't use doorbells yet! */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 1 : 0; +} + +static u32 pf_get_spare_dbs(struct xe_gt *gt) +{ + u32 spare; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.num_dbs; + spare = max_t(u32, spare, pf_get_min_spare_dbs(gt)); + + return spare; +} + +static int pf_set_spare_dbs(struct xe_gt *gt, u32 spare) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (spare > GUC_NUM_DOORBELLS) + return -EINVAL; + + if (spare && spare < pf_get_min_spare_dbs(gt)) + return -EINVAL; + + gt->sriov.pf.spare.num_dbs = spare; + return 0; +} + +/* Return: start ID or negative error code on failure */ +static int pf_reserve_dbs(struct xe_gt *gt, u32 num) +{ + struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; + unsigned int spare = pf_get_spare_dbs(gt); + + return xe_guc_db_mgr_reserve_range(dbm, num, spare); +} + +static void pf_release_dbs(struct xe_gt *gt, u32 start, u32 num) +{ + struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; + + if (num) + xe_guc_db_mgr_release_range(dbm, start, num); +} + +static void pf_release_config_dbs(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + pf_release_dbs(gt, config->begin_db, config->num_dbs); + config->begin_db = 0; + config->num_dbs = 0; +} + +static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int ret; + + xe_gt_assert(gt, vfid); + + if (num_dbs > GUC_NUM_DOORBELLS) + return -EINVAL; + + if (config->num_dbs) { + ret = pf_push_vf_cfg_dbs(gt, vfid, 0, 0); + if (unlikely(ret)) + return ret; + + pf_release_config_dbs(gt, config); + } + + if (!num_dbs) + return 0; + + ret = pf_reserve_dbs(gt, num_dbs); + if (unlikely(ret < 0)) + return ret; + + config->begin_db = ret; + config->num_dbs = num_dbs; + + ret = pf_push_vf_cfg_dbs(gt, vfid, config->begin_db, config->num_dbs); + if (unlikely(ret)) { + pf_release_config_dbs(gt, config); + return ret; + } + + xe_gt_sriov_dbg_verbose(gt, "VF%u doorbells %u-%u\n", + vfid, config->begin_db, config->begin_db + config->num_dbs - 1); + return 0; +} + +static u32 pf_get_vf_config_dbs(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->num_dbs; +} + +/** + * xe_gt_sriov_pf_config_get_dbs - Get VF's GuC doorbells IDs quota. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * If &vfid represents a PF then number of PF's spare GuC doorbells IDs is returned. + * + * Return: VF's quota (or PF's spare). + */ +u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid) +{ + u32 num_dbs; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + num_dbs = pf_get_vf_config_dbs(gt, vfid); + else + num_dbs = pf_get_spare_dbs(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return num_dbs; +} + +/** + * xe_gt_sriov_pf_config_set_dbs - Configure GuC doorbells IDs quota for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @num_dbs: requested number of GuC doorbells IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) +{ + int err; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_dbs(gt, vfid, num_dbs); + else + err = pf_set_spare_dbs(gt, num_dbs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, num_dbs, + xe_gt_sriov_pf_config_get_dbs(gt, vfid), + "GuC doorbell IDs", vfid ? no_unit : spare_unit, err); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_dbs - Provision many VFs with GuC context IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @num_dbs: requested number of GuC doorbell IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u32 num_dbs) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_dbs(gt, n, num_dbs); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_dbs, + xe_gt_sriov_pf_config_get_dbs, + "GuC doorbell IDs", no_unit, n, err); +} + +static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) +{ + struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; + u32 spare = pf_get_spare_dbs(gt); + u32 fair = (GUC_NUM_DOORBELLS - spare) / num_vfs; + int ret; + + for (; fair; --fair) { + ret = xe_guc_db_mgr_reserve_range(dbm, fair * num_vfs, spare); + if (ret < 0) + continue; + xe_guc_db_mgr_release_range(dbm, ret, fair * num_vfs); + break; + } + + xe_gt_sriov_dbg_verbose(gt, "doorbells fair(%u x %u)\n", num_vfs, fair); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_dbs - Provision many VFs with fair GuC doorbell IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u32 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_dbs(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair); +} + +static u64 pf_get_lmem_alignment(struct xe_gt *gt) +{ + /* this might be platform dependent */ + return SZ_2M; +} + +static u64 pf_get_min_spare_lmem(struct xe_gt *gt) +{ + /* this might be platform dependent */ + return SZ_128M; /* XXX: preliminary */ +} + +static u64 pf_get_spare_lmem(struct xe_gt *gt) +{ + u64 spare; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.lmem_size; + spare = max_t(u64, spare, pf_get_min_spare_lmem(gt)); + + return spare; +} + +static int pf_set_spare_lmem(struct xe_gt *gt, u64 size) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (size && size < pf_get_min_spare_lmem(gt)) + return -EINVAL; + + gt->sriov.pf.spare.lmem_size = size; + return 0; +} + +static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_bo *bo; + + bo = config->lmem_obj; + return bo ? bo->size : 0; +} + +static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile; + unsigned int tid; + int err; + + for_each_tile(tile, xe, tid) { + if (tile->primary_gt == gt) { + err = pf_push_vf_cfg_lmem(gt, vfid, size); + } else { + u64 lmem = pf_get_vf_config_lmem(tile->primary_gt, vfid); + + if (!lmem) + continue; + err = pf_push_vf_cfg_lmem(gt, vfid, lmem); + } + if (unlikely(err)) + return err; + } + return 0; +} + +static void pf_force_lmtt_invalidate(struct xe_device *xe) +{ + /* TODO */ +} + +static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) +{ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_drop_pages(lmtt, vfid); + } +} + +static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt_sriov_config *config; + struct xe_tile *tile; + struct xe_lmtt *lmtt; + struct xe_bo *bo; + struct xe_gt *gt; + u64 total, offset; + unsigned int gtid; + unsigned int tid; + int err; + + total = 0; + for_each_tile(tile, xe, tid) + total += pf_get_vf_config_lmem(tile->primary_gt, vfid); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + + xe_lmtt_drop_pages(lmtt, vfid); + if (!total) + continue; + + err = xe_lmtt_prepare_pages(lmtt, vfid, total); + if (err) + goto fail; + + offset = 0; + for_each_gt(gt, xe, gtid) { + if (xe_gt_is_media_type(gt)) + continue; + + config = pf_pick_vf_config(gt, vfid); + bo = config->lmem_obj; + if (!bo) + continue; + + err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); + if (err) + goto fail; + offset += bo->size; + } + } + + pf_force_lmtt_invalidate(xe); + return 0; + +fail: + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_drop_pages(lmtt, vfid); + } + return err; +} + +static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (config->lmem_obj) { + xe_bo_unpin_map_no_vm(config->lmem_obj); + config->lmem_obj = NULL; + } +} + +static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + int err; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + size = round_up(size, pf_get_lmem_alignment(gt)); + + if (config->lmem_obj) { + err = pf_distribute_config_lmem(gt, vfid, 0); + if (unlikely(err)) + return err; + + pf_reset_vf_lmtt(xe, vfid); + pf_release_vf_config_lmem(gt, config); + } + xe_gt_assert(gt, !config->lmem_obj); + + if (!size) + return 0; + + xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + config->lmem_obj = bo; + + err = pf_update_vf_lmtt(xe, vfid); + if (unlikely(err)) + goto release; + + err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + if (unlikely(err)) + goto reset_lmtt; + + xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", + vfid, bo->size, bo->size / SZ_1M); + return 0; + +reset_lmtt: + pf_reset_vf_lmtt(xe, vfid); +release: + pf_release_vf_config_lmem(gt, config); + return err; +} + +/** + * xe_gt_sriov_pf_config_get_lmem - Get VF's LMEM quota. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's spare) LMEM quota. + */ +u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid) +{ + u64 size; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + size = pf_get_vf_config_lmem(gt, vfid); + else + size = pf_get_spare_lmem(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return size; +} + +/** + * xe_gt_sriov_pf_config_set_lmem - Provision VF with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: the VF identifier + * @size: requested LMEM size + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_lmem(gt, vfid, size); + else + err = pf_set_spare_lmem(gt, size); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u64_done(gt, vfid, size, + xe_gt_sriov_pf_config_get_lmem(gt, vfid), + vfid ? "LMEM" : "spare LMEM", err); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_lmem - Provision many VFs with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @size: requested LMEM size + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_lmem(gt, n, size); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, + xe_gt_sriov_pf_config_get_lmem, + "LMEM", n, err); +} + +static u64 pf_query_free_lmem(struct xe_gt *gt) +{ + struct xe_tile *tile = gt->tile; + + return xe_ttm_vram_get_avail(&tile->mem.vram_mgr->manager); +} + +static u64 pf_query_max_lmem(struct xe_gt *gt) +{ + u64 alignment = pf_get_lmem_alignment(gt); + u64 spare = pf_get_spare_lmem(gt); + u64 free = pf_query_free_lmem(gt); + u64 avail; + + /* XXX: need to account for 2MB blocks only */ + avail = free > spare ? free - spare : 0; + avail = round_down(avail, alignment); + + return avail; +} + +#ifdef CONFIG_DRM_XE_DEBUG_SRIOV +#define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */ +#else +#define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */ +#endif + +static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) +{ + u64 available = pf_query_max_lmem(gt); + u64 alignment = pf_get_lmem_alignment(gt); + u64 fair; + + fair = div_u64(available, num_vfs); + fair = ALIGN_DOWN(fair, alignment); +#ifdef MAX_FAIR_LMEM + fair = min_t(u64, MAX_FAIR_LMEM, fair); +#endif + xe_gt_sriov_dbg_verbose(gt, "LMEM available(%lluM) fair(%u x %lluM)\n", + available / SZ_1M, num_vfs, fair / SZ_1M); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_lmem - Provision many VFs with fair LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u64 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_lmem(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_lmem(gt, vfid, num_vfs, fair); +} + +/** + * xe_gt_sriov_pf_config_set_fair - Provision many VFs with fair resources. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + int result = 0; + int err; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + + if (!xe_gt_is_media_type(gt)) { + err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); + result = result ?: err; + err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); + result = result ?: err; + } + err = xe_gt_sriov_pf_config_set_fair_ctxs(gt, vfid, num_vfs); + result = result ?: err; + err = xe_gt_sriov_pf_config_set_fair_dbs(gt, vfid, num_vfs); + result = result ?: err; + + return result; +} + +static const char *exec_quantum_unit(u32 exec_quantum) +{ + return exec_quantum ? "ms" : "(infinity)"; +} + +static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum); + if (unlikely(err)) + return err; + + config->exec_quantum = exec_quantum; + return 0; +} + +static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->exec_quantum; +} + +/** + * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_exec_quantum(gt, vfid, exec_quantum); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, exec_quantum, + xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid), + "execution quantum", exec_quantum_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's) execution quantum in milliseconds. + */ +u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) +{ + u32 exec_quantum; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + exec_quantum = pf_get_exec_quantum(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return exec_quantum; +} + +static const char *preempt_timeout_unit(u32 preempt_timeout) +{ + return preempt_timeout ? "us" : "(infinity)"; +} + +static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout); + if (unlikely(err)) + return err; + + config->preempt_timeout = preempt_timeout; + + return 0; +} + +static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->preempt_timeout; +} + +/** + * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, preempt_timeout, + xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid), + "preemption timeout", preempt_timeout_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's) preemption timeout in microseconds. + */ +u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) +{ + u32 preempt_timeout; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + preempt_timeout = pf_get_preempt_timeout(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return preempt_timeout; +} + +static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + config->exec_quantum = 0; + config->preempt_timeout = 0; +} + +static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + if (!xe_gt_is_media_type(gt)) { + pf_release_vf_config_ggtt(gt, config); + pf_release_vf_config_lmem(gt, config); + } + pf_release_config_ctxs(gt, config); + pf_release_config_dbs(gt, config); + pf_reset_config_sched(gt, config); +} + +/** + * xe_gt_sriov_pf_config_release - Release and reset VF configuration. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be PF) + * @force: force configuration release + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force) +{ + int err; + + xe_gt_assert(gt, vfid); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err || force) + pf_release_vf_config(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "VF%u unprovisioning failed with error (%pe)%s\n", + vfid, ERR_PTR(err), + force ? " but all resources were released anyway!" : ""); + } + + return force ? 0 : err; +} + +/** + * xe_gt_sriov_pf_config_push - Reprovision VF's configuration. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be PF) + * @refresh: explicit refresh + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh) +{ + int err = 0; + + xe_gt_assert(gt, vfid); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (refresh) + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err) + err = pf_push_full_vf_config(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to %s VF%u configuration (%pe)\n", + refresh ? "refresh" : "push", vfid, ERR_PTR(err)); + } + + return err; +} + +/** + * xe_gt_sriov_pf_config_print_ggtt - Print GGTT configurations. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GGTT configuration data for all VFs. + * VFs without provisioned GGTT are ignored. + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); + const struct xe_gt_sriov_config *config; + char buf[10]; + + for (n = 1; n <= total_vfs; n++) { + config = >->sriov.pf.vfs[n].config; + if (!drm_mm_node_allocated(&config->ggtt_region)) + continue; + + string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", + n, config->ggtt_region.start, + config->ggtt_region.start + config->ggtt_region.size - 1, buf); + } + + return 0; +} + +/** + * xe_gt_sriov_pf_config_print_ctxs - Print GuC context IDs configurations. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GuC context ID allocations across all VFs. + * VFs without GuC context IDs are skipped. + * + * This function can only be called on PF. + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); + const struct xe_gt_sriov_config *config; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 1; n <= total_vfs; n++) { + config = >->sriov.pf.vfs[n].config; + if (!config->num_ctxs) + continue; + + drm_printf(p, "VF%u:\t%u-%u\t(%u)\n", + n, + config->begin_ctx, + config->begin_ctx + config->num_ctxs - 1, + config->num_ctxs); + } + + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return 0; +} + +/** + * xe_gt_sriov_pf_config_print_dbs - Print GuC doorbell ID configurations. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GuC doorbell IDs allocations across all VFs. + * VFs without GuC doorbell IDs are skipped. + * + * This function can only be called on PF. + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); + const struct xe_gt_sriov_config *config; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 1; n <= total_vfs; n++) { + config = >->sriov.pf.vfs[n].config; + if (!config->num_dbs) + continue; + + drm_printf(p, "VF%u:\t%u-%u\t(%u)\n", + n, + config->begin_db, + config->begin_db + config->num_dbs - 1, + config->num_dbs); + } + + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return 0; +} + +/** + * xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GGTT ranges that are available for the provisioning. + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 alignment = pf_get_ggtt_alignment(gt); + u64 spare = pf_get_spare_ggtt(gt); + u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); + u64 hole_start, hole_end, hole_size; + u64 avail, total = 0; + char buf[10]; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + total += hole_size; + + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", + hole_start, hole_end - 1, buf); + } + + mutex_unlock(&ggtt->lock); + + string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "total:\t%llu\t(%s)\n", total, buf); + + string_get_size(spare, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "spare:\t%llu\t(%s)\n", spare, buf); + + avail = total > spare ? total - spare : 0; + + string_get_size(avail, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "avail:\t%llu\t(%s)\n", avail, buf); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h new file mode 100644 index 000000000000..5e6b36f00b5b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_CONFIG_H_ +#define _XE_GT_SRIOV_PF_CONFIG_H_ + +#include + +struct drm_printer; +struct xe_gt; + +u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size); +int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, + unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, + unsigned int vfid, unsigned int num_vfs, u64 size); + +u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs); +int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, + u32 num_ctxs); + +u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs); +int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, + u32 num_dbs); + +u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size); +int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, + u64 size); + +u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum); + +u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout); + +int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force); +int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh); + +int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p); + +int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h new file mode 100644 index 000000000000..d3745c355957 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ +#define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ + +#include + +struct xe_bo; + +/** + * struct xe_gt_sriov_config - GT level per-VF configuration data. + * + * Used by the PF driver to maintain per-VF provisioning data. + */ +struct xe_gt_sriov_config { + /** @ggtt_region: GGTT region assigned to the VF. */ + struct drm_mm_node ggtt_region; + /** @lmem_obj: LMEM allocation for use by the VF. */ + struct xe_bo *lmem_obj; + /** @num_ctxs: number of GuC contexts IDs. */ + u16 num_ctxs; + /** @begin_ctx: start index of GuC context ID range. */ + u16 begin_ctx; + /** @num_dbs: number of GuC doorbells IDs. */ + u16 num_dbs; + /** @begin_db: start index of GuC doorbell ID range. */ + u16 begin_db; + /** @exec_quantum: execution-quantum in milliseconds. */ + u32 exec_quantum; + /** @preempt_timeout: preemption timeout in microseconds. */ + u32 preempt_timeout; +}; + +/** + * struct xe_gt_sriov_spare_config - GT-level PF spare configuration data. + * + * Used by the PF driver to maintain it's own reserved (spare) provisioning + * data that is not applicable to be tracked in struct xe_gt_sriov_config. + */ +struct xe_gt_sriov_spare_config { + /** @ggtt_size: GGTT size. */ + u64 ggtt_size; + /** @lmem_size: LMEM size. */ + u64 lmem_size; + /** @num_ctxs: number of GuC submission contexts. */ + u16 num_ctxs; + /** @num_dbs: number of GuC doorbells. */ + u16 num_dbs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 223f280ef748..faf9ee8266ce 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -8,22 +8,26 @@ #include +#include "xe_gt_sriov_pf_config_types.h" #include "xe_gt_sriov_pf_policy_types.h" /** * struct xe_gt_sriov_metadata - GT level per-VF metadata. */ struct xe_gt_sriov_metadata { - /* XXX: VF metadata will go here */ + /** @config: per-VF provisioning data. */ + struct xe_gt_sriov_config config; }; /** * struct xe_gt_sriov_pf - GT level PF virtualization data. * @policy: policy data. + * @spare: PF-only provisioning configuration. * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { struct xe_gt_sriov_pf_policy policy; + struct xe_gt_sriov_spare_config spare; struct xe_gt_sriov_metadata *vfs; }; -- cgit v1.2.3 From b7f888ee9c41e66af847d2a2266b80266e49e49a Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 9 Apr 2024 17:26:45 -0700 Subject: drm/xe/lnl: Enable more GuC based workarounds There are a couple of new workarounds for LNL that are implemented in the GuC firmware. The KMD needs to enable them explicitly. Signed-off-by: John Harrison Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240410002646.3002394-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 2 ++ drivers/gpu/drm/xe/xe_guc_ads.c | 8 ++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 3 +++ 3 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 5dd45e06f0b6..0972113f6b81 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -324,6 +324,8 @@ enum { */ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, + GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 757cbbb87869..678ece366056 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -318,6 +318,14 @@ static void guc_waklv_init(struct xe_guc_ads *ads) guc_waklv_enable_simple(ads, GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, &offset, &remain); + if (XE_WA(gt, 18024947630)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING, + &offset, &remain); + if (XE_WA(gt, 16022287689)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, + &offset, &remain); size = guc_ads_waklv_size(ads) - remain; if (!size) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 98a81468bc8e..5759b7bc1b70 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,3 +21,6 @@ PLATFORM(DG2) 14018094691 GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) +18024947630 GRAPHICS_VERSION(2004) + MEDIA_VERSION(2000) +16022287689 GRAPHICS_VERSION(2004) -- cgit v1.2.3 From 09700beebacb712d04c5ecf64178ed3f56cdf1d9 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 9 Apr 2024 17:26:46 -0700 Subject: drm/xe/bmg: Some LNL workarounds also apply to BMG Enable a couple of existing workarounds for a new platform. Signed-off-by: John Harrison Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240410002646.3002394-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_wa_oob.rules | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 5759b7bc1b70..eb647d5a1e16 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,6 +21,8 @@ PLATFORM(DG2) 14018094691 GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) -18024947630 GRAPHICS_VERSION(2004) +18024947630 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) MEDIA_VERSION(2000) -16022287689 GRAPHICS_VERSION(2004) +16022287689 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) -- cgit v1.2.3 From 67a9e86dc1305107df0bad57f7788229c040f280 Mon Sep 17 00:00:00 2001 From: Lu Yao Date: Mon, 15 Apr 2024 10:52:15 +0800 Subject: drm/xe: select X86_PLATFORM_DEVICES when ACPI_WMI is selected ACPI_WMI is a subitem of X86_PLATFORM_DEVICES. And X86_PLATFORM_DEVICES is not selected in the current Kconfig, and may cause Kconfig warnings: WARNING: unmet direct dependencies detected for ACPI_WMI Depends on [n]: X86_PLATFORM_DEVICES [=n] && ACPI [=y] Selected by [m]: - DRM_XE [=m] && HAS_IOMEM [=y] && DRM [=m] && PCI [=y] && MMU [=y] && (m && MODULES [=y] || y && KUNIT [=y]=y) && X86 [=y] && ACPI [=y] Signed-off-by: Lu Yao Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240415025215.15811-1-yaolu@kylinos.cn Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 0576b1acba9e..782934be0a77 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -29,6 +29,7 @@ config DRM_XE select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI select ACPI_BUTTON if ACPI + select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE select IOSF_MBI -- cgit v1.2.3 From 9890821f3ec160c8255b8807d15025e792c39d52 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 12 Apr 2024 16:03:02 +0100 Subject: drm/xe/stolen: lower the default alignment No need to be so aggressive here. The upper layers will already apply the needed alignment, plus some allocations might wish to skip it. Main issue is that we might want to have start/end bias range which doesn't match the default alignment which is rejected by the allocator. Signed-off-by: Matthew Auld Cc: Matt Roper Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240412150301.273344-3-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 6ffecf9f23d1..f77367329760 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -204,7 +204,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) { struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - u64 stolen_size, io_size, pgsize; + u64 stolen_size, io_size; int err; if (!mgr) { @@ -226,10 +226,6 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) return; } - pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; - if (pgsize < PAGE_SIZE) - pgsize = PAGE_SIZE; - /* * We don't try to attempt partial visible support for stolen vram, * since stolen is always at the end of vram, and the BAR size is pretty @@ -240,7 +236,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) io_size = stolen_size; err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, - io_size, pgsize); + io_size, PAGE_SIZE); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); return; -- cgit v1.2.3 From 48b1f11c95e8c9ded6516b9e0fd3abddcfc89163 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 12 Apr 2024 16:03:03 +0100 Subject: drm/xe/stolen: ignore first page for FBC We have observed underruns on some platforms if the CFB offset is within the first page of stolen. Just like i915 skip the first page. v2 (Maarten) - Also align the start. BSpec: 50214 Reported-by: Matt Roper Signed-off-by: Matthew Auld Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240412150301.273344-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h index b4ccc4231e7d..cb6c7598824b 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h @@ -19,8 +19,13 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, int err; u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; - if (align) + if (start < SZ_4K) + start = SZ_4K; + + if (align) { size = ALIGN(size, align); + start = ALIGN(start, align); + } bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), NULL, size, start, end, -- cgit v1.2.3 From 83967c57320d0d01ae512f10e79213f81e4bf594 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 12 Apr 2024 12:31:45 +0100 Subject: drm/xe/vm: prevent UAF with asid based lookup The asid is only erased from the xarray when the vm refcount reaches zero, however this leads to potential UAF since the xe_vm_get() only works on a vm with refcount != 0. Since the asid is allocated in the vm create ioctl, rather erase it when closing the vm, prior to dropping the potential last ref. This should also work when user closes driver fd without explicit vm destroy. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1594 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240412113144.259426-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2dbba55e7785..b31e263ca754 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1481,6 +1481,16 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe->usm.num_vm_in_fault_mode--; else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) xe->usm.num_vm_in_non_fault_mode--; + + if (vm->usm.asid) { + void *lookup; + + xe_assert(xe, xe->info.has_asid); + xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); + + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + xe_assert(xe, lookup == vm); + } mutex_unlock(&xe->usm.lock); for_each_tile(tile, xe, id) @@ -1496,24 +1506,15 @@ static void vm_destroy_work_func(struct work_struct *w) struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; - void *lookup; /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); mutex_destroy(&vm->snap_mutex); - if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) xe_device_mem_access_put(xe); - if (xe->info.has_asid && vm->usm.asid) { - mutex_lock(&xe->usm.lock); - lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - xe_assert(xe, lookup == vm); - mutex_unlock(&xe->usm.lock); - } - } - for_each_tile(tile, xe, id) XE_WARN_ON(vm->pt_root[id]); -- cgit v1.2.3 From 5b259c0d1d3caa6efc66c2b856840e68993f814e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 12 Apr 2024 12:31:46 +0100 Subject: drm/xe/vm: drop vm->destroy_work Now that we no longer grab the usm.lock mutex (which might sleep) it looks like it should be safe to directly perform xe_vm_free when vm refcount reaches zero, instead of punting that off to some worker. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240412113144.259426-5-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 17 ++--------------- drivers/gpu/drm/xe/xe_vm_types.h | 7 ------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b31e263ca754..5f24f7802c26 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1179,8 +1179,6 @@ static const struct xe_pt_ops xelp_pt_ops = { .pde_encode_bo = xelp_pde_encode_bo, }; -static void vm_destroy_work_func(struct work_struct *w); - /** * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the * given tile and vm. @@ -1260,8 +1258,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); - INIT_WORK(&vm->destroy_work, vm_destroy_work_func); - INIT_LIST_HEAD(&vm->preempt.exec_queues); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ @@ -1499,10 +1495,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vm_put(vm); } -static void vm_destroy_work_func(struct work_struct *w) +static void xe_vm_free(struct drm_gpuvm *gpuvm) { - struct xe_vm *vm = - container_of(w, struct xe_vm, destroy_work); + struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; @@ -1522,14 +1517,6 @@ static void vm_destroy_work_func(struct work_struct *w) kfree(vm); } -static void xe_vm_free(struct drm_gpuvm *gpuvm) -{ - struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); - - /* To destroy the VM we need to be able to sleep */ - queue_work(system_unbound_wq, &vm->destroy_work); -} - struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) { struct xe_vm *vm; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index badf3945083d..7570c2c6c463 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -177,13 +177,6 @@ struct xe_vm { */ struct list_head rebind_list; - /** - * @destroy_work: worker to destroy VM, needed as a dma_fence signaling - * from an irq context can be last put and the destroy needs to be able - * to sleep. - */ - struct work_struct destroy_work; - /** * @rftree: range fence tree to track updates to page table structure. * Used to implement conflict tracking between independent bind engines. -- cgit v1.2.3 From 8eae42f1759034f7bc717699fb105d6b5900986f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 12 Apr 2024 12:31:47 +0100 Subject: drm/xe/vm: don't include xe_gt.h clangd complains here, since nothing in xe_gt.h seems to be needed. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240412113144.259426-6-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5f24f7802c26..8a858b8588bd 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -27,7 +27,6 @@ #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue.h" -#include "xe_gt.h" #include "xe_gt_pagefault.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" -- cgit v1.2.3 From 2817a1f1bfb1a2e8a4fb16dd307980216f831c46 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 16 Apr 2024 22:48:02 -0700 Subject: drm/xe/lnl: Apply GuC Wa_13011645652 Enable WA for a bug that could cause the C6 state machine to hang during RC6 exit. v2: Add comment clarifying the WA (John H) v3: Add more details to the comment (John H) Signed-off-by: Vinay Belgaumkar Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240417054802.1766359-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 1 + drivers/gpu/drm/xe/xe_guc_ads.c | 38 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 0972113f6b81..511cf974d585 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -326,6 +326,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, + GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 678ece366056..1aafa486edec 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -282,6 +282,33 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) return total_size; } +static void guc_waklv_enable_one_word(struct xe_guc_ads *ads, + enum xe_guc_klv_ids klv_id, + u32 value, + u32 *offset, u32 *remain) +{ + u32 size; + u32 klv_entry[] = { + /* 16:16 key/length */ + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | + FIELD_PREP(GUC_KLV_0_LEN, 1), + value, + /* 1 dword data */ + }; + + size = sizeof(klv_entry); + + if (*remain < size) { + drm_warn(&ads_to_xe(ads)->drm, + "w/a klv buffer too small to add klv id %d\n", klv_id); + } else { + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, + klv_entry, size); + *offset += size; + *remain -= size; + } +} + static void guc_waklv_enable_simple(struct xe_guc_ads *ads, enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) { @@ -327,6 +354,17 @@ static void guc_waklv_init(struct xe_guc_ads *ads) GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, &offset, &remain); + /* + * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, + * the default value for this register is determined to be 0xC40. This could change in the + * future, so GuC depends on KMD to send it the correct value. + */ + if (XE_WA(gt, 13011645652)) + guc_waklv_enable_one_word(ads, + GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, + 0xC40, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index eb647d5a1e16..12fe88796a49 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -26,3 +26,4 @@ MEDIA_VERSION(2000) 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) +13011645652 GRAPHICS_VERSION(2004) -- cgit v1.2.3 From cbb6a7413b174637f35354675ecd7e1183091bfa Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:43 -0400 Subject: drm/xe: Introduce xe_pm_runtime_get_noresume for inner callers Let's ensure that we have an option for inner callers that will raise WARN if device is not active and not protected by outer callers. Make this also a void function forcing every caller to unconditionally put the reference back afterwards. This will be very important for cases where we want to hold the reference before scheduling a work in a queue. Then the work job will be responsible for putting it back. While at this, already convert a case from mem_access_get_ongoing where it is not checking for the reference and put it back, what would cause the underflow. v2: Fix identation. v3: Convert equivalent missing put from mem_access towards pm_runtime. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 4 ++-- drivers/gpu/drm/xe/xe_pm.c | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 71bd52dfebcf..50ec661116a2 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -128,7 +128,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) * already grabbed the rpm ref outside any sensitive locks. */ if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); + xe_pm_runtime_get_noresume(xe); return 0; @@ -217,7 +217,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) for (i = 0; i < q->width; ++i) xe_lrc_finish(q->lrc + i); if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - xe_device_mem_access_put(gt_to_xe(q->gt)); + xe_pm_runtime_put(gt_to_xe(q->gt)); __xe_exec_queue_free(q); } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index f3fd003b6944..37339bb57229 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -485,6 +485,26 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) return pm_runtime_get_if_in_use(xe->drm.dev) > 0; } +/** + * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming + * @xe: xe device instance + * + * This function should be used in inner places where it is surely already + * protected by outer-bound callers of `xe_pm_runtime_get`. + * It will warn if not protected. + * The reference should be put back after this function regardless, since it + * will always bump the usage counter, regardless. + */ +void xe_pm_runtime_get_noresume(struct xe_device *xe) +{ + bool ref; + + ref = xe_pm_runtime_get_if_in_use(xe); + + if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n")) + pm_runtime_get_noresume(xe->drm.dev); +} + /** * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 0cb38ca244fe..119b630ad1d1 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -31,6 +31,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe); void xe_pm_runtime_put(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); bool xe_pm_runtime_get_if_in_use(struct xe_device *xe); +void xe_pm_runtime_get_noresume(struct xe_device *xe); bool xe_pm_runtime_resume_and_get(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); -- cgit v1.2.3 From 82e279a49a519295a47d1e39f8bb75d9a6ea8ad8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:44 -0400 Subject: drm/xe: Introduce intel_runtime_pm_get_noresume at compat-i915-headers for display The i915-display will start using the intel_runtime_pm_noresume. So we need to add the compat header before it. Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 90a279800612..6a502e9f97d0 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -177,6 +177,14 @@ static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_p return xe_pm_runtime_get_if_in_use(xe); } +static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + xe_pm_runtime_get_noresume(xe); + return true; +} + static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) { struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); -- cgit v1.2.3 From 77e619a82fc384ae3d1d96e1f2ea98ad14a4fdce Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:45 -0400 Subject: drm/i915/display: convert inner wakeref get towards get_if_in_use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch brings no functional change. Since at this point of the code we are already asserting a wakeref was held, it means that we are with runtime_pm 'in_use' and in practical terms we are only bumping the pm_runtime usage counter and moving on. However, xe driver has a lockdep annotation that warned us that if a sync resume was actually called at this point, we could have a deadlock because we are inside the power_domains->lock locked area and the resume would call the irq_reset, which would also try to get the power_domains->lock. For this reason, let's convert this call to a safer option and calm lockdep on. v2: use _noresume variant instead of get_in_use (Ville, Imre) Cc: Ville Syrjälä Acked-by: Imre Deak Cc: Matthew Auld Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-3-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 6fd4fa52253a..048943d0a881 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -646,7 +646,7 @@ release_async_put_domains(struct i915_power_domains *power_domains, * power well disabling. */ assert_rpm_raw_wakeref_held(rpm); - wakeref = intel_runtime_pm_get(rpm); + wakeref = intel_runtime_pm_get_noresume(rpm); for_each_power_domain(domain, mask) { /* Clear before put, so put's sanity check is happy. */ -- cgit v1.2.3 From 8ae84a27441f0267138b8a7f37eca6af481e8bc2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:46 -0400 Subject: drm/xe: Move lockdep protection from mem_access to xe_pm_runtime The mem_access itself is not holding any lock, but attempting to train lockdep with possible scarring locks happening during runtime pm. We are going soon to kill the mem_access get and put helpers in favor of direct xe_pm_runtime calls, so let's just move this lock around to where it now belongs. v2: s/lockdep_training/lockdep_prime (Matt Auld) Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-4-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 23 --------------------- drivers/gpu/drm/xe/xe_device.h | 4 ---- drivers/gpu/drm/xe/xe_pm.c | 45 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d85a2ba0a057..4b38fb78adca 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -46,12 +46,6 @@ #include "xe_vm.h" #include "xe_wait_user_fence.h" -#ifdef CONFIG_LOCKDEP -struct lockdep_map xe_device_mem_access_lockdep_map = { - .name = "xe_device_mem_access_lockdep_map" -}; -#endif - static int xe_file_open(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -780,23 +774,6 @@ void xe_device_mem_access_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; - /* - * Since the resume here is synchronous it can be quite easy to deadlock - * if we are not careful. Also in practice it might be quite timing - * sensitive to ever see the 0 -> 1 transition with the callers locks - * held, so deadlocks might exist but are hard for lockdep to ever see. - * With this in mind, help lockdep learn about the potentially scary - * stuff that can happen inside the runtime_resume callback by acquiring - * a dummy lock (it doesn't protect anything and gets compiled out on - * non-debug builds). Lockdep then only needs to see the - * mem_access_lockdep_map -> runtime_resume callback once, and then can - * hopefully validate all the (callers_locks) -> mem_access_lockdep_map. - * For example if the (callers_locks) are ever grabbed in the - * runtime_resume callback, lockdep should give us a nice splat. - */ - lock_map_acquire(&xe_device_mem_access_lockdep_map); - lock_map_release(&xe_device_mem_access_lockdep_map); - xe_pm_runtime_get(xe); ref = atomic_inc_return(&xe->mem_access.ref); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index d413bc2c6be5..02eda6610972 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -16,10 +16,6 @@ struct xe_file; #include "xe_force_wake.h" #include "xe_macros.h" -#ifdef CONFIG_LOCKDEP -extern struct lockdep_map xe_device_mem_access_lockdep_map; -#endif - static inline struct xe_device *to_xe_device(const struct drm_device *dev) { return container_of(dev, struct xe_device, drm); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 37339bb57229..a1a13d17aa78 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -68,6 +68,12 @@ * management (RPS). */ +#ifdef CONFIG_LOCKDEP +struct lockdep_map xe_pm_runtime_lockdep_map = { + .name = "xe_pm_runtime_lockdep_map" +}; +#endif + /** * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle * @xe: xe device instance @@ -307,11 +313,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_pm_write_callback_task(xe, current); /* - * The actual xe_device_mem_access_put() is always async underneath, so + * The actual xe_pm_runtime_put() is always async underneath, so * exactly where that is called should makes no difference to us. However * we still need to be very careful with the locks that this callback * acquires and the locks that are acquired and held by any callers of - * xe_device_mem_access_get(). We already have the matching annotation + * xe_runtime_pm_get(). We already have the matching annotation * on that side, but we also need it here. For example lockdep should be * able to tell us if the following scenario is in theory possible: * @@ -319,15 +325,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe) * lock(A) | * | xe_pm_runtime_suspend() * | lock(A) - * xe_device_mem_access_get() | + * xe_pm_runtime_get() | * * This will clearly deadlock since rpm core needs to wait for * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) * on CPU0 which prevents CPU1 making forward progress. With the - * annotation here and in xe_device_mem_access_get() lockdep will see + * annotation here and in xe_pm_runtime_get() lockdep will see * the potential lock inversion and give us a nice splat. */ - lock_map_acquire(&xe_device_mem_access_lockdep_map); + lock_map_acquire(&xe_pm_runtime_lockdep_map); /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify @@ -353,7 +359,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_irq_suspend(xe); out: - lock_map_release(&xe_device_mem_access_lockdep_map); + lock_map_release(&xe_pm_runtime_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; } @@ -373,7 +379,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); - lock_map_acquire(&xe_device_mem_access_lockdep_map); + lock_map_acquire(&xe_pm_runtime_lockdep_map); /* * It can be possible that xe has allowed d3cold but other pcie devices @@ -408,11 +414,31 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } out: - lock_map_release(&xe_device_mem_access_lockdep_map); + lock_map_release(&xe_pm_runtime_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; } +/* + * For places where resume is synchronous it can be quite easy to deadlock + * if we are not careful. Also in practice it might be quite timing + * sensitive to ever see the 0 -> 1 transition with the callers locks + * held, so deadlocks might exist but are hard for lockdep to ever see. + * With this in mind, help lockdep learn about the potentially scary + * stuff that can happen inside the runtime_resume callback by acquiring + * a dummy lock (it doesn't protect anything and gets compiled out on + * non-debug builds). Lockdep then only needs to see the + * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. + * For example if the (callers_locks) are ever grabbed in the + * runtime_resume callback, lockdep should give us a nice splat. + */ +static void pm_runtime_lockdep_prime(void) +{ + lock_map_acquire(&xe_pm_runtime_lockdep_map); + lock_map_release(&xe_pm_runtime_lockdep_map); +} + /** * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously * @xe: xe device instance @@ -424,6 +450,7 @@ void xe_pm_runtime_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; + pm_runtime_lockdep_prime(); pm_runtime_resume(xe->drm.dev); } @@ -453,6 +480,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe) if (WARN_ON(xe_pm_read_callback_task(xe) == current)) return -ELOOP; + pm_runtime_lockdep_prime(); return pm_runtime_get_sync(xe->drm.dev); } @@ -519,6 +547,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe) return true; } + pm_runtime_lockdep_prime(); return pm_runtime_resume_and_get(xe->drm.dev) >= 0; } -- cgit v1.2.3 From 152c37bf40e626f5ebe3a57f75de3ae280014d3f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:47 -0400 Subject: drm/xe: Remove useless mem_access during probe xe_pm_init is the very last thing during the xe_pci_probe(), hence these protections are useless from the point of view of ensuring that the device is awake. Let's remove it so we continue towards the goal of killing xe_device_mem_access. v2: Adding more cases v3: Provide a separate fix for xe_tile_init_noalloc return (Matt) Adding a new case where display HDCP init calls which are also called at display probe time. Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-5-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 2 -- drivers/gpu/drm/xe/xe_ggtt.c | 2 -- drivers/gpu/drm/xe/xe_gt.c | 9 --------- drivers/gpu/drm/xe/xe_tile.c | 15 +++++---------- drivers/gpu/drm/xe/xe_uc.c | 11 ----------- 5 files changed, 5 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index ac4b870f73fa..264b957f3639 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -70,7 +70,6 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, int ret = 0; /* allocate object of two page for HDCP command memory and store it */ - xe_device_mem_access_get(xe); bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | @@ -90,7 +89,6 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, hdcp_message->hdcp_cmd_in = cmd_in; hdcp_message->hdcp_cmd_out = cmd_out; out: - xe_device_mem_access_put(xe); return ret; } diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index f090cab065b8..38f6c94c722d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -203,14 +203,12 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) u64 start, end; /* Display may have allocated inside ggtt, so be careful with clearing here */ - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) xe_ggtt_clear(ggtt, start, end - start); xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); } int xe_ggtt_init(struct xe_ggtt *ggtt) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 38956b60e084..091c0eb6ce2b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -354,7 +354,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) { int err, i; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_hw_fence_irq; @@ -396,7 +395,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); XE_WARN_ON(err); - xe_device_mem_access_put(gt_to_xe(gt)); return 0; @@ -406,7 +404,6 @@ err_force_wake: err_hw_fence_irq: for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); - xe_device_mem_access_put(gt_to_xe(gt)); return err; } @@ -415,7 +412,6 @@ static int all_fw_domain_init(struct xe_gt *gt) { int err, i; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_hw_fence_irq; @@ -481,7 +477,6 @@ static int all_fw_domain_init(struct xe_gt *gt) err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); - xe_device_mem_access_put(gt_to_xe(gt)); return 0; @@ -490,7 +485,6 @@ err_force_wake: err_hw_fence_irq: for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); - xe_device_mem_access_put(gt_to_xe(gt)); return err; } @@ -503,7 +497,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) { int err; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto out; @@ -526,8 +519,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); out: - xe_device_mem_access_put(gt_to_xe(gt)); - return err; } diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 0650b2fa75ef..74ecb5f39438 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -160,24 +160,19 @@ int xe_tile_init_noalloc(struct xe_tile *tile) { int err; - xe_device_mem_access_get(tile_to_xe(tile)); - err = tile_ttm_mgr_init(tile); if (err) - goto err_mem_access; + return err; tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); - if (IS_ERR(tile->mem.kernel_bb_pool)) { - err = PTR_ERR(tile->mem.kernel_bb_pool); - goto err_mem_access; - } + if (IS_ERR(tile->mem.kernel_bb_pool)) + return PTR_ERR(tile->mem.kernel_bb_pool); + xe_wa_apply_tile_workarounds(tile); xe_tile_sysfs_init(tile); -err_mem_access: - xe_device_mem_access_put(tile_to_xe(tile)); - return err; + return 0; } void xe_tile_migrate_wait(struct xe_tile *tile) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 7033f8c1b431..4feb35c95a1c 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -32,11 +32,8 @@ uc_to_xe(struct xe_uc *uc) /* Should be called once at driver load only */ int xe_uc_init(struct xe_uc *uc) { - struct xe_device *xe = uc_to_xe(uc); int ret; - xe_device_mem_access_get(xe); - /* * We call the GuC/HuC/GSC init functions even if GuC submission is off * to correctly move our tracking of the FW state to "disabled". @@ -65,16 +62,8 @@ int xe_uc_init(struct xe_uc *uc) goto err; ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0); - if (ret) - goto err; - - xe_device_mem_access_put(xe); - - return 0; err: - xe_device_mem_access_put(xe); - return ret; } -- cgit v1.2.3 From fdea94a4c25a9923f7418325f45951431945d14c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:48 -0400 Subject: drm/xe: Convert xe_gem_fault to use direct xe_pm_runtime calls The gem page fault is one of the outer bound protections where we want to ensure that the hardware is in D0 before proceeding with memory access. Let's convert it towards the xe_pm_runtime functions directly so we can then convert the mem_access to be inner protection only and then Kill it for good. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-6-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index fdeb3691d3f6..9889adcc458b 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -22,6 +22,7 @@ #include "xe_gt.h" #include "xe_map.h" #include "xe_migrate.h" +#include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_res_cursor.h" #include "xe_trace.h" @@ -1107,7 +1108,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) int idx; if (needs_rpm) - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); ret = ttm_bo_vm_reserve(tbo, vmf); if (ret) @@ -1138,7 +1139,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) dma_resv_unlock(tbo->base.resv); out: if (needs_rpm) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return ret; } -- cgit v1.2.3 From a382291017f94b2dde4dcbc69675043761943d0a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:49 -0400 Subject: drm/xe: Removing extra mem_access protection from runtime pm This is not needed any longer, now that we have all the protection in place with the runtime pm itself. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-7-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 8 -------- drivers/gpu/drm/xe/xe_device.h | 1 - drivers/gpu/drm/xe/xe_pm.c | 3 --- 3 files changed, 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 4b38fb78adca..1f8f9018b27c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -717,14 +717,6 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; } -bool xe_device_mem_access_ongoing(struct xe_device *xe) -{ - if (xe_pm_read_callback_task(xe) != NULL) - return true; - - return atomic_read(&xe->mem_access.ref); -} - /** * xe_device_assert_mem_access - Inspect the current runtime_pm state. * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 02eda6610972..39921666e1f1 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -138,7 +138,6 @@ bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe); void xe_device_mem_access_put(struct xe_device *xe); void xe_device_assert_mem_access(struct xe_device *xe); -bool xe_device_mem_access_ongoing(struct xe_device *xe); static inline bool xe_device_in_fault_mode(struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index a1a13d17aa78..0d5fbd715a25 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -306,9 +306,6 @@ int xe_pm_runtime_suspend(struct xe_device *xe) u8 id; int err = 0; - if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) - return -EBUSY; - /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); -- cgit v1.2.3 From 16b57c90bb81d7a6a83bfb0152a6425570644e07 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:50 -0400 Subject: drm/xe: Convert mem_access_if_ongoing to direct xe_pm_runtime_get_if_active Now that assert_mem_access is relying directly on the pm_runtime state instead of the counters, there's no reason why we cannot use the pm_runtime functions directly. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-8-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 17 ----------------- drivers/gpu/drm/xe/xe_device.h | 1 - drivers/gpu/drm/xe/xe_guc_ct.c | 8 ++++---- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1f8f9018b27c..47a4bb0b0a7b 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -733,23 +733,6 @@ void xe_device_assert_mem_access(struct xe_device *xe) xe_assert(xe, !xe_pm_runtime_suspended(xe)); } -bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) -{ - bool active; - - if (xe_pm_read_callback_task(xe) == current) - return true; - - active = xe_pm_runtime_get_if_active(xe); - if (active) { - int ref = atomic_inc_return(&xe->mem_access.ref); - - xe_assert(xe, ref != S32_MAX); - } - - return active; -} - void xe_device_mem_access_get(struct xe_device *xe) { int ref; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 39921666e1f1..54490802e97b 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -134,7 +134,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) } void xe_device_mem_access_get(struct xe_device *xe); -bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe); void xe_device_mem_access_put(struct xe_device *xe); void xe_device_assert_mem_access(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index b1412d432ec2..ac9324338ccf 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1210,7 +1210,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) bool ongoing; int len; - ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct)); if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) return; @@ -1223,7 +1223,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) spin_unlock(&ct->fast_lock); if (ongoing) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); } /* Returns less than zero on error, 0 on done, 1 on more available */ @@ -1281,7 +1281,7 @@ static void g2h_worker_func(struct work_struct *w) * responses, if the worker here is blocked on those callbacks * completing, creating a deadlock. */ - ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct)); if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) return; @@ -1299,7 +1299,7 @@ static void g2h_worker_func(struct work_struct *w) } while (ret == 1); if (ongoing) - xe_device_mem_access_put(ct_to_xe(ct)); + xe_pm_runtime_put(ct_to_xe(ct)); } static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, -- cgit v1.2.3 From e1feade0776ee6bee1fc2d987a4b40bc0e47cf66 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:51 -0400 Subject: drm/xe: Ensure all the inner access are using the _noresume variant At this point mem_access references should be only used as inner points of the execution and a get with synchronous resume previously called at an outer point. So, before killing mem_acces in favor of direct accsess, let's ensure that we first convert them towards the new _noresume variant that will WARN us if no inner caller happened. Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-9-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 47a4bb0b0a7b..a7c4e4f73200 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -749,7 +749,7 @@ void xe_device_mem_access_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; - xe_pm_runtime_get(xe); + xe_pm_runtime_get_noresume(xe); ref = atomic_inc_return(&xe->mem_access.ref); xe_assert(xe, ref != S32_MAX); -- cgit v1.2.3 From f9116f658a6217b101e3b4e89f845775b6fb05d9 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 17 Apr 2024 16:39:52 -0400 Subject: drm/xe: Add outer runtime_pm protection to xe_live_ktest@xe_dma_buf Any kunit doing any memory access should get their own runtime_pm outer references since they don't use the standard driver API entries. In special this dma_buf from the same driver. Found by pre-merge CI on adding WARN calls for unprotected inner callers: <6> [318.639739] # xe_dma_buf_kunit: running xe_test_dmabuf_import_same_driver <4> [318.639957] ------------[ cut here ]------------ <4> [318.639967] xe 0000:4d:00.0: Missing outer runtime PM protection <4> [318.640049] WARNING: CPU: 117 PID: 3832 at drivers/gpu/drm/xe/xe_pm.c:533 xe_pm_runtime_get_noresume+0x48/0x60 [xe] Cc: Matthew Auld Cc: Francois Dugast Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-10-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index d54dd5b43007..e7f9b531c465 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -12,6 +12,7 @@ #include "tests/xe_pci_test.h" #include "xe_pci.h" +#include "xe_pm.h" static bool p2p_enabled(struct dma_buf_test_params *params) { @@ -259,6 +260,7 @@ static int dma_buf_run_device(struct xe_device *xe) const struct dma_buf_test_params *params; struct kunit *test = xe_cur_kunit(); + xe_pm_runtime_get(xe); for (params = test_params; params->mem_mask; ++params) { struct dma_buf_test_params p = *params; @@ -266,6 +268,7 @@ static int dma_buf_run_device(struct xe_device *xe) test->priv = &p; xe_test_dmabuf_import_same_driver(xe); } + xe_pm_runtime_put(xe); /* A non-zero return would halt iteration over driver devices */ return 0; -- cgit v1.2.3 From cba22c911c7009aec4de2c890f3440cbb3fe67e4 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 17 Apr 2024 18:25:01 -0300 Subject: drm/xe/xe2lpg: Extend Wa_14020338487 Wa_14020338487 also applies to Xe2_LPG. Replicate the existing entry to one specific for Xe2_LPG. Signed-off-by: Gustavo Sousa Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240417212501.312346-1-gustavo.sousa@intel.com --- drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 632bd9066f8d..dcf7ed51757c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -445,6 +445,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE)) }, + { XE_RTP_NAME("14020338487"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) + }, { XE_RTP_NAME("16021540221"), XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), FUNC(xe_rtp_match_first_render_or_compute)), -- cgit v1.2.3 From 5a73dd61a0288490b0cfba44dd1cb8c9a0fc65f7 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:05 +0530 Subject: drm/xe: Simplify function return using drmm_add_action_or_reset() Instead of assigning the value of drmm_add_action_or_reset() to err and returning err in case of failure and 0 in case of success, simply return the result of drmm_add_action_or_reset(). -v2: cleanup in xe_display too. Cc: Rodrigo Vivi Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-2-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_display.c | 8 +------- drivers/gpu/drm/xe/xe_device.c | 6 +----- drivers/gpu/drm/xe/xe_gsc_proxy.c | 7 +------ drivers/gpu/drm/xe/xe_gt.c | 6 +----- drivers/gpu/drm/xe/xe_guc_pc.c | 6 +----- drivers/gpu/drm/xe/xe_hw_engine.c | 6 +----- 6 files changed, 6 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 6ec375c1c4b6..63b27fbcdaca 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -101,8 +101,6 @@ static void display_destroy(struct drm_device *dev, void *dummy) */ int xe_display_create(struct xe_device *xe) { - int err; - spin_lock_init(&xe->display.fb_tracking.lock); xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); @@ -110,11 +108,7 @@ int xe_display_create(struct xe_device *xe) drmm_mutex_init(&xe->drm, &xe->sb_lock); xe->enabled_irq_mask = ~0; - err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a7c4e4f73200..3b0820594836 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -649,11 +649,7 @@ int xe_device_probe(struct xe_device *xe) xe_hwmon_register(xe); - err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); err_fini_display: xe_display_driver_remove(xe); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 35e397b68dfc..1b908d238bd1 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -403,7 +403,6 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; void *csme; - int err; csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); if (!csme) @@ -424,11 +423,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) gsc->proxy.to_csme = csme; gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; - err = drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); } /** diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 091c0eb6ce2b..10ba91fadb9a 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -558,11 +558,7 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); } static int do_gt_reset(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 521ae24f2314..509649d0e65e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -937,9 +937,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) pc->bo = bo; - err = drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index a688bb2d96ce..455f375c1cbd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -550,11 +550,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; - err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); err_kernel_lrc: xe_lrc_finish(&hwe->kernel_lrc); -- cgit v1.2.3 From a99641e38704202ae2a97202b3d249208c9cda7f Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:06 +0530 Subject: drm/xe: Remove sysfs only once on action add failure The drmm_add_action_or_reset function automatically invokes the action (sysfs removal) in the event of a failure; therefore, there's no necessity to call it within the return check. Modify the return type of xe_gt_ccs_mode_sysfs_init to int, allowing the caller to pass errors up the call chain. Should sysfs creation or drmm_add_action_or_reset fail, error propagation will prompt a driver load abort. -v2 Edit commit message (Nikula/Lucas) use err_force_wake label instead of new. (Lucas) Avoid unnecessary warn/error messages. (Lucas) Fixes: f3bc5bb4d53d ("drm/xe: Allow userspace to configure CCS mode") Cc: Lucas De Marchi Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Niranjana Vishwanathapura Reviewed-by: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-3-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 4 +++- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 19 +++++++------------ drivers/gpu/drm/xe/xe_gt_ccs_mode.h | 2 +- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 10ba91fadb9a..fe70d6e9dfa9 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -385,7 +385,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) err); /* Initialize CCS mode sysfs after early initialization of HW engines */ - xe_gt_ccs_mode_sysfs_init(gt); + err = xe_gt_ccs_mode_sysfs_init(gt); + if (err) + goto err_force_wake; /* * Stash hardware-reported version. Since this register does not exist diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 529fc286cd06..396aeb5b9924 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -167,25 +167,20 @@ static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg) * and it is expected that there are no open drm clients while doing so. * The number of available compute slices is exposed to user through a per-gt * 'num_cslices' sysfs interface. + * + * Returns: Returns error value for failure and 0 for success. */ -void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) +int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; if (!xe_gt_ccs_mode_enabled(gt)) - return; + return 0; err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs); - if (err) { - drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err); - return; - } + if (err) + return err; - err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); - if (err) { - sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs); - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - } + return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h index f39975aaaab0..f8779852cf0d 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h @@ -12,7 +12,7 @@ #include "xe_platform_types.h" void xe_gt_apply_ccs_mode(struct xe_gt *gt); -void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); +int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt) { -- cgit v1.2.3 From 22bf0bc04d273ca002a47de55693797b13076602 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:07 +0530 Subject: drm/xe: call free_gsc_pkt only once on action add failure The drmm_add_action_or_reset function automatically invokes the action (free_gsc_pkt) in the event of a failure; therefore, there's no necessity to call it within the return check. -v2 Fix commit message. (Lucas) Fixes: d8b1571312b7 ("drm/xe/huc: HuC authentication via GSC") Cc: Rodrigo Vivi Cc: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-4-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_huc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 78318d73e4cf..39a484a57585 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -53,7 +53,6 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_gt *gt = huc_to_gt(huc); struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; - int err; /* we use a single object for both input and output */ bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, @@ -66,13 +65,7 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) huc->gsc_pkt = bo; - err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); - if (err) { - free_gsc_pkt(&xe->drm, huc); - return err; - } - - return 0; + return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); } int xe_huc_init(struct xe_huc *huc) -- cgit v1.2.3 From 6e40f142c57999ba8d274902a4eb2369b538f767 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:08 +0530 Subject: drm/xe: Return NULL in case of drmm_add_action_or_reset failure In case of drmm_add_action_or_reset failure return NULL and no need to print warning messages as they will be printed implictly. Cc: Tejas Upadhyay Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-5-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index c5084d94c442..daab970f8be8 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -520,9 +520,8 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, &keclass->base); if (err) - drm_warn(&xe->drm, - "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return NULL; + return keclass; } @@ -553,13 +552,8 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, if (err) goto err_object; - err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, - kobj); - if (err) - drm_warn(&xe->drm, - "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - return err; + return drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, kobj); + err_object: kobject_put(kobj); return err; @@ -708,14 +702,8 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) goto err_object; } - err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, - kobj); - if (err) - drm_warn(&xe->drm, - "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, kobj); - return err; err_object: kobject_put(kobj); return err; -- cgit v1.2.3 From 9c3f72a342c9558929ad63839e758d35ac28ae93 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:09 +0530 Subject: drm/xe/gt: Abort driver load for sysfs creation failure Instead of allowing the driver to load with incomplete sysfs entries in case of sysfs creation failure, we should terminate the driver loading. This change ensures that the status of all gt associated sysfs entries creation is relayed to xe_gt_init, leading to a driver load abort if any sysfs creation failures occur. -v2 use err_force_wake label instead of new. (Lucas) Avoid unnecessary warn/error messages. (Lucas) Cc: Rodrigo Vivi Cc: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-6-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 16 ++++++++++------ drivers/gpu/drm/xe/xe_gt_freq.c | 25 ++++++++++--------------- drivers/gpu/drm/xe/xe_gt_freq.h | 2 +- drivers/gpu/drm/xe/xe_gt_idle.c | 16 +++++----------- drivers/gpu/drm/xe/xe_gt_idle.h | 2 +- drivers/gpu/drm/xe/xe_gt_sysfs.c | 14 ++++---------- drivers/gpu/drm/xe/xe_gt_sysfs.h | 2 +- drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c | 13 ++++--------- drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h | 2 +- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 6 +----- 10 files changed, 38 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index fe70d6e9dfa9..491d0413de15 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -366,7 +366,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); } - xe_gt_idle_sysfs_init(>->gtidle); + err = xe_gt_idle_sysfs_init(>->gtidle); + if (err) + goto err_force_wake; /* Enable per hw engine IRQs */ xe_irq_enable_hwe(gt); @@ -380,9 +382,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) err = xe_hw_engine_class_sysfs_init(gt); if (err) - drm_warn(>_to_xe(gt)->drm, - "failed to register engines sysfs directory, err: %d\n", - err); + goto err_force_wake; /* Initialize CCS mode sysfs after early initialization of HW engines */ err = xe_gt_ccs_mode_sysfs_init(gt); @@ -546,13 +546,17 @@ int xe_gt_init(struct xe_gt *gt) xe_mocs_init_early(gt); - xe_gt_sysfs_init(gt); + err = xe_gt_sysfs_init(gt); + if (err) + return err; err = gt_fw_domain_init(gt); if (err) return err; - xe_gt_freq_init(gt); + err = xe_gt_freq_init(gt); + if (err) + return err; xe_force_wake_init_engines(gt, gt_to_fw(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 32b9a743629c..855de40e40ea 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -222,33 +222,28 @@ static void freq_fini(struct drm_device *drm, void *arg) * @gt: Xe GT object * * It needs to be initialized after GT Sysfs and GuC PC components are ready. + * + * Returns: Returns error value for failure and 0 for success. */ -void xe_gt_freq_init(struct xe_gt *gt) +int xe_gt_freq_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; if (xe->info.skip_guc_pc) - return; + return 0; gt->freq = kobject_create_and_add("freq0", gt->sysfs); - if (!gt->freq) { - drm_warn(&xe->drm, "failed to add freq0 directory to %s\n", - kobject_name(gt->sysfs)); - return; - } + if (!gt->freq) + return -ENOMEM; err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq); - if (err) { - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - return; - } + if (err) + return err; err = sysfs_create_files(gt->freq, freq_attrs); if (err) - drm_warn(&xe->drm, "failed to add freq attrs to %s, err: %d\n", - kobject_name(gt->freq), err); + return err; - xe_gt_throttle_sysfs_init(gt); + return xe_gt_throttle_sysfs_init(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h index f3fe3c90491a..b7fddbe7b9b6 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.h +++ b/drivers/gpu/drm/xe/xe_gt_freq.h @@ -8,6 +8,6 @@ struct xe_gt; -void xe_gt_freq_init(struct xe_gt *gt); +int xe_gt_freq_init(struct xe_gt *gt); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index bc1426f8d731..8fc0f3f6ecc5 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -152,7 +152,7 @@ static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(kobj); } -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) +int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) { struct xe_gt *gt = gtidle_to_gt(gtidle); struct xe_device *xe = gt_to_xe(gt); @@ -160,10 +160,8 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) int err; kobj = kobject_create_and_add("gtidle", gt->sysfs); - if (!kobj) { - drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM); - return; - } + if (!kobj) + return -ENOMEM; if (xe_gt_is_media_type(gt)) { snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id); @@ -180,14 +178,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) err = sysfs_create_files(kobj, gt_idle_attrs); if (err) { kobject_put(kobj); - drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err); - return; + return err; } - err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); - if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); } void xe_gt_idle_enable_c6(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 69280fd16b03..75bd99659b1b 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -10,7 +10,7 @@ struct xe_gt; -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); +int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); void xe_gt_idle_enable_c6(struct xe_gt *gt); void xe_gt_idle_disable_c6(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index c69d2e8a0fe1..1e5971072bc8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -29,7 +29,7 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(gt->sysfs); } -void xe_gt_sysfs_init(struct xe_gt *gt) +int xe_gt_sysfs_init(struct xe_gt *gt) { struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); @@ -38,24 +38,18 @@ void xe_gt_sysfs_init(struct xe_gt *gt) kg = kzalloc(sizeof(*kg), GFP_KERNEL); if (!kg) - return; + return -ENOMEM; kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); kg->gt = gt; err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id); if (err) { - drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err); kobject_put(&kg->base); - return; + return err; } gt->sysfs = &kg->base; - err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt); - if (err) { - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - return; - } + return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h index e3ec278ca0be..ecbfcc5c7d42 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.h +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h @@ -8,7 +8,7 @@ #include "xe_gt_sysfs_types.h" -void xe_gt_sysfs_init(struct xe_gt *gt); +int xe_gt_sysfs_init(struct xe_gt *gt); static inline struct xe_gt * kobj_to_gt(struct kobject *kobj) diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c index 9c33045ff1ef..fbe21a8599ca 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c @@ -236,19 +236,14 @@ static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg) sysfs_remove_group(gt->freq, &throttle_group_attrs); } -void xe_gt_throttle_sysfs_init(struct xe_gt *gt) +int xe_gt_throttle_sysfs_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; err = sysfs_create_group(gt->freq, &throttle_group_attrs); - if (err) { - drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err); - return; - } - - err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt); if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return err; + + return drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h index 3ecfd4beffe1..6c61e6f228a8 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h @@ -10,7 +10,7 @@ struct xe_gt; -void xe_gt_throttle_sysfs_init(struct xe_gt *gt); +int xe_gt_throttle_sysfs_init(struct xe_gt *gt); #endif /* _XE_GT_THROTTLE_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index daab970f8be8..844ec68cbbb8 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -690,12 +690,8 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) keclass->eclass = hwe->eclass; err = xe_add_hw_engine_class_defaults(xe, &keclass->base); - if (err) { - drm_warn(&xe->drm, - "Add .defaults to engines failed!, err: %d\n", - err); + if (err) goto err_object; - } err = sysfs_create_files(&keclass->base, files); if (err) -- cgit v1.2.3 From e3d0839aa50175d9af99f84f8c03523a4e42d8a7 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:10 +0530 Subject: drm/xe/tile: Abort driver load for sysfs creation failure Ensure that the status of all tile associated sysfs entries creation is relayed to xe_tile_init_noalloc, leading to a driver load abort if any sysfs creation failures occur. -v2 Avoid unnecessary warn/error messages. (Lucas) Cc: Rodrigo Vivi Cc: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-7-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_tile.c | 2 +- drivers/gpu/drm/xe/xe_tile_sysfs.c | 16 +++++++--------- drivers/gpu/drm/xe/xe_tile_sysfs.h | 2 +- drivers/gpu/drm/xe/xe_vram_freq.c | 20 ++++++++------------ drivers/gpu/drm/xe/xe_vram_freq.h | 2 +- 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 74ecb5f39438..15ea0a942f67 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -170,7 +170,7 @@ int xe_tile_init_noalloc(struct xe_tile *tile) xe_wa_apply_tile_workarounds(tile); - xe_tile_sysfs_init(tile); + err = xe_tile_sysfs_init(tile); return 0; } diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 237a0761d3ad..64661403afcd 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -29,7 +29,7 @@ static void tile_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(tile->sysfs); } -void xe_tile_sysfs_init(struct xe_tile *tile) +int xe_tile_sysfs_init(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); struct device *dev = xe->drm.dev; @@ -38,7 +38,7 @@ void xe_tile_sysfs_init(struct xe_tile *tile) kt = kzalloc(sizeof(*kt), GFP_KERNEL); if (!kt) - return; + return -ENOMEM; kobject_init(&kt->base, &xe_tile_sysfs_kobj_type); kt->tile = tile; @@ -46,16 +46,14 @@ void xe_tile_sysfs_init(struct xe_tile *tile) err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); if (err) { kobject_put(&kt->base); - drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err); - return; + return err; } tile->sysfs = &kt->base; - xe_vram_freq_sysfs_init(tile); - - err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); + err = xe_vram_freq_sysfs_init(tile); if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return err; + + return drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); } diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h index e4f065039eba..54a2ba8ba533 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.h +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h @@ -8,7 +8,7 @@ #include "xe_tile_sysfs_types.h" -void xe_tile_sysfs_init(struct xe_tile *tile); +int xe_tile_sysfs_init(struct xe_tile *tile); static inline struct xe_tile * kobj_to_tile(struct kobject *kobj) diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index c5f6b5a5d117..3e21ddc6e60c 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -100,31 +100,27 @@ static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg) * @tile: Xe Tile object * * It needs to be initialized after the main tile component is ready + * + * Returns: 0 on success, negative error code on error. */ -void xe_vram_freq_sysfs_init(struct xe_tile *tile) +int xe_vram_freq_sysfs_init(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); struct kobject *kobj; int err; if (xe->info.platform != XE_PVC) - return; + return 0; kobj = kobject_create_and_add("memory", tile->sysfs); - if (!kobj) { - drm_warn(&xe->drm, "failed to add memory directory, err: %d\n", -ENOMEM); - return; - } + if (!kobj) + return -ENOMEM; err = sysfs_create_group(kobj, &freq_group_attrs); if (err) { kobject_put(kobj); - drm_warn(&xe->drm, "failed to register vram freq sysfs, err: %d\n", err); - return; + return err; } - err = drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj); - if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj); } diff --git a/drivers/gpu/drm/xe/xe_vram_freq.h b/drivers/gpu/drm/xe/xe_vram_freq.h index cbe8c12fbd64..bf726bc5881f 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.h +++ b/drivers/gpu/drm/xe/xe_vram_freq.h @@ -8,6 +8,6 @@ struct xe_tile; -void xe_vram_freq_sysfs_init(struct xe_tile *tile); +int xe_vram_freq_sysfs_init(struct xe_tile *tile); #endif /* _XE_VRAM_FREQ_H_ */ -- cgit v1.2.3 From c086bfc6ff4db73a39e7c9cc106f1ba7f0051be6 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:11 +0530 Subject: drm/xe/pm: Capture errors and handle them xe_pm_init may encounter failures for various reasons, such as a failure in initializing drmm_mutex, or when dealing with a d3cold-capable device for vram_threshold sysfs creation and setting default threshold. Presently, all these potential failures are disregarded. Move d3cold.lock initialization to xe_pm_init_early and cause driver abort if mutex initialization has failed. For xe_pm_init failures cleanup the driver and return error code -v2 Make mutex init cleaner (Lucas) Cc: Lucas De Marchi Cc: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-8-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device_sysfs.c | 12 ++++-------- drivers/gpu/drm/xe/xe_device_sysfs.h | 2 +- drivers/gpu/drm/xe/xe_pci.c | 12 ++++++++++-- drivers/gpu/drm/xe/xe_pm.c | 36 ++++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_pm.h | 4 ++-- 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index e47c8ad1bb17..21677b8cd977 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -76,18 +76,14 @@ static void xe_device_sysfs_fini(struct drm_device *drm, void *arg) sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); } -void xe_device_sysfs_init(struct xe_device *xe) +int xe_device_sysfs_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; int ret; ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - if (ret) { - drm_warn(&xe->drm, "Failed to create sysfs file\n"); - return; - } - - ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe); if (ret) - drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n"); + return ret; + + return drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h index 38b240684bee..f9e83d8bd2c7 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.h +++ b/drivers/gpu/drm/xe/xe_device_sysfs.h @@ -8,6 +8,6 @@ struct xe_device; -void xe_device_sysfs_init(struct xe_device *xe); +int xe_device_sysfs_init(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index fa2cc80a08a3..65ea44672cab 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -781,18 +781,26 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) str_yes_no(xe_device_has_sriov(xe)), xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); - xe_pm_init_early(xe); + err = xe_pm_init_early(xe); + if (err) + return err; err = xe_device_probe(xe); if (err) return err; - xe_pm_init(xe); + err = xe_pm_init(xe); + if (err) + goto err_driver_cleanup; drm_dbg(&xe->drm, "d3cold: capable=%s\n", str_yes_no(xe->d3cold.capable)); return 0; + +err_driver_cleanup: + xe_pci_remove(pdev); + return err; } static void xe_pci_shutdown(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 0d5fbd715a25..37fbeda12d3b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -214,10 +214,21 @@ static void xe_pm_runtime_init(struct xe_device *xe) pm_runtime_put(dev); } -void xe_pm_init_early(struct xe_device *xe) +int xe_pm_init_early(struct xe_device *xe) { + int err; + INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); - drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); + + err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); + if (err) + return err; + + err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock); + if (err) + return err; + + return 0; } /** @@ -225,23 +236,32 @@ void xe_pm_init_early(struct xe_device *xe) * @xe: xe device instance * * This component is responsible for System and Device sleep states. + * + * Returns 0 for success, negative error code otherwise. */ -void xe_pm_init(struct xe_device *xe) +int xe_pm_init(struct xe_device *xe) { + int err; + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) - return; - - drmm_mutex_init(&xe->drm, &xe->d3cold.lock); + return 0; xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); if (xe->d3cold.capable) { - xe_device_sysfs_init(xe); - xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + err = xe_device_sysfs_init(xe); + if (err) + return err; + + err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + if (err) + return err; } xe_pm_runtime_init(xe); + + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 119b630ad1d1..18b0613fe57b 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -20,8 +20,8 @@ struct xe_device; int xe_pm_suspend(struct xe_device *xe); int xe_pm_resume(struct xe_device *xe); -void xe_pm_init_early(struct xe_device *xe); -void xe_pm_init(struct xe_device *xe); +int xe_pm_init_early(struct xe_device *xe); +int xe_pm_init(struct xe_device *xe); void xe_pm_runtime_fini(struct xe_device *xe); bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); -- cgit v1.2.3 From 5bc9de065b8bb9b8dd8799ecb4592d0403b54281 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 17 Apr 2024 07:56:46 -0700 Subject: drm/i915/hwmon: Get rid of devm When both hwmon and hwmon drvdata (on which hwmon depends) are device managed resources, the expectation, on device unbind, is that hwmon will be released before drvdata. However, in i915 there are two separate code paths, which both release either drvdata or hwmon and either can be released before the other. These code paths (for device unbind) are as follows (see also the bug referenced below): Call Trace: release_nodes+0x11/0x70 devres_release_group+0xb2/0x110 component_unbind_all+0x8d/0xa0 component_del+0xa5/0x140 intel_pxp_tee_component_fini+0x29/0x40 [i915] intel_pxp_fini+0x33/0x80 [i915] i915_driver_remove+0x4c/0x120 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x32/0xa0 device_release_driver_internal+0x19c/0x200 unbind_store+0x9c/0xb0 and Call Trace: release_nodes+0x11/0x70 devres_release_all+0x8a/0xc0 device_unbind_cleanup+0x9/0x70 device_release_driver_internal+0x1c1/0x200 unbind_store+0x9c/0xb0 This means that in i915, if use devm, we cannot gurantee that hwmon will always be released before drvdata. Which means that we have a uaf if hwmon sysfs is accessed when drvdata has been released but hwmon hasn't. The only way out of this seems to be do get rid of devm_ and release/free everything explicitly during device unbind. v2: Change commit message and other minor code changes v3: Cleanup from i915_hwmon_register on error (Armin Wolf) v4: Eliminate potential static analyzer warning (Rodrigo) Eliminate fetch_and_zero (Jani) v5: Restore previous logic for ddat_gt->hwmon_dev error return (Andi) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366 Reviewed-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240417145646.793223-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 46 +++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index b758fd110c20..c0662a022f59 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!IS_DGFX(i915)) return; - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; @@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915) hwm_get_preregistration_info(i915); /* hwmon_dev points to device hwmon */ - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, - ddat, - &hwm_chip_info, - hwm_groups); - if (IS_ERR(hwmon_dev)) { - i915->hwmon = NULL; - return; - } + hwmon_dev = hwmon_device_register_with_info(dev, ddat->name, + ddat, + &hwm_chip_info, + hwm_groups); + if (IS_ERR(hwmon_dev)) + goto err; ddat->hwmon_dev = hwmon_dev; @@ -839,16 +837,36 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) continue; - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, - ddat_gt, - &hwm_gt_chip_info, - NULL); + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + &hwm_gt_chip_info, + NULL); if (!IS_ERR(hwmon_dev)) ddat_gt->hwmon_dev = hwmon_dev; } + return; +err: + i915_hwmon_unregister(i915); } void i915_hwmon_unregister(struct drm_i915_private *i915) { - fetch_and_zero(&i915->hwmon); + struct i915_hwmon *hwmon = i915->hwmon; + struct intel_gt *gt; + int i; + + if (!hwmon) + return; + + for_each_gt(gt, i915, i) + if (hwmon->ddat_gt[i].hwmon_dev) + hwmon_device_unregister(hwmon->ddat_gt[i].hwmon_dev); + + if (hwmon->ddat.hwmon_dev) + hwmon_device_unregister(hwmon->ddat.hwmon_dev); + + mutex_destroy(&hwmon->hwmon_lock); + + kfree(i915->hwmon); + i915->hwmon = NULL; } -- cgit v1.2.3 From 7af6b116261c12b37674ac4639e23e9df9b09fb3 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 18 Apr 2024 18:37:56 -0400 Subject: drm/i915: Convert intel_runtime_pm_get_noresume towards raw wakeref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past, the noresume function was used by the GEM code to ensure wakelocks were held and bump its usage. This is no longer the case and this function was totally unused until it started to be used again by display with commit 77e619a82fc3 ("drm/i915/display: convert inner wakeref get towards get_if_in_use") However, on the display code, most of the callers are using the raw wakeref, rather then the wakelock version. What caused a major regression caught by CI. Another option to this patch is to go with the original plan and use the get_if_in_use variant in the display code, what is enough to fulfil our needs. Then, an extra patch to delete the unused _noresume variant. v2: Keep grabbing wakelock but only assert for wakeref. (Imre) Cc: Imre Deak Cc: Francois Dugast Cc: Ville Syrjälä Fixes: 77e619a82fc3 ("drm/i915/display: convert inner wakeref get towards get_if_in_use") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10875 Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240418223756.68427-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 6 ------ drivers/gpu/drm/i915/intel_runtime_pm.c | 14 +++++--------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 048943d0a881..03dc7edcc443 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -640,12 +640,6 @@ release_async_put_domains(struct i915_power_domains *power_domains, enum intel_display_power_domain domain; intel_wakeref_t wakeref; - /* - * The caller must hold already raw wakeref, upgrade that to a proper - * wakeref to make the state checker happy about the HW access during - * power well disabling. - */ - assert_rpm_raw_wakeref_held(rpm); wakeref = intel_runtime_pm_get_noresume(rpm); for_each_power_domain(domain, mask) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d4e844128826..2d0647aca964 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -272,15 +272,11 @@ intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm) * intel_runtime_pm_get_noresume - grab a runtime pm reference * @rpm: the intel_runtime_pm structure * - * This function grabs a device-level runtime pm reference (mostly used for GEM - * code to ensure the GTT or GT is on). + * This function grabs a device-level runtime pm reference. * - * It will _not_ power up the device but instead only check that it's powered - * on. Therefore it is only valid to call this functions from contexts where - * the device is known to be powered up and where trying to power it up would - * result in hilarity and deadlocks. That pretty much means only the system - * suspend/resume code where this is used to grab runtime pm references for - * delayed setup down in work items. + * It will _not_ resume the device but instead only get an extra wakeref. + * Therefore it is only valid to call this functions from contexts where + * the device is known to be active and with another wakeref previously hold. * * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. @@ -289,7 +285,7 @@ intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm) */ intel_wakeref_t intel_runtime_pm_get_noresume(struct intel_runtime_pm *rpm) { - assert_rpm_wakelock_held(rpm); + assert_rpm_raw_wakeref_held(rpm); pm_runtime_get_noresume(rpm->kdev); intel_runtime_pm_acquire(rpm, true); -- cgit v1.2.3 From 62422b7be49ea6b82c2b02325966b51bbf855b0d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 17 Apr 2024 08:26:22 -0700 Subject: drm/xe: Define all possible engines in media IP descriptors Rather than trying to identify exactly which engines are available on each platform in the IP descriptor, just include the list of all media engines that the IP could theoretically support (i.e., 8 VCS + 4 VECS). We still rely on the media fuse registers to tell us which specific engine instances are actually present on a given platform, so there shouldn't be any functional change. This will help prevent mistakes with engine numbering (for example ambiguity about whether the 2nd VCS engine on a platform with exactly two engines is numbered "VCS1" or "VCS2") and will also future-proof the code a bit more in case new SKUs or platform refreshes extend the engine list in the future. Note that the media fuse register technically has an 8-bit field for VECS engine presence starting on Xe2. However there's still no MMIO register range reserved for VE engines above VECS3, so VE0-VE3 is still consider the "maximum" VE engine mask that the driver can support for now. Bspec: 52614, 52615, 62567 Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240417152621.3357990-2-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 65ea44672cab..fb20c9828563 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -185,8 +185,8 @@ static const struct xe_media_desc media_xem = { .rel = 0, .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | - BIT(XE_HW_ENGINE_VECS0), + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), }; static const struct xe_media_desc media_xehpm = { @@ -195,21 +195,23 @@ static const struct xe_media_desc media_xehpm = { .rel = 55, .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1), + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), }; static const struct xe_media_desc media_xelpmp = { .name = "Xe_LPM+", .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0) + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) | + BIT(XE_HW_ENGINE_GSCCS0) }; static const struct xe_media_desc media_xe2 = { .name = "Xe2_LPM / Xe2_HPM", .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ }; static const struct xe_device_desc tgl_desc = { -- cgit v1.2.3 From 783d6cdc8231f625c42a367396ae534b15e67ebc Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 18 Apr 2024 10:30:49 -0400 Subject: drm/xe: Kill xe_device_mem_access_{get*,put} Let's simply convert all the current callers towards direct xe_pm_runtime access and remove this extra layer of indirection. No functional change is expected with this patch since xe_mem_access_get was already using the xe_pm_runtime_get_noresume at this point. v2: Convert all the current callers instead of a big refactor at once. v3: - Rebased - Squashed the GSC/HDCP - Added a new case: sriov_pf_policy - Improved commit message to highlight that there's no functional change in this patch. Reviewed-by: Matthew Auld #v2 Cc: Suraj Kandpal Cc: Michal Wajdeczko Signed-off-by: Rodrigo Vivi Reviewed-by: Suraj Kandpal Link: https://patchwork.freedesktop.org/patch/msgid/20240418143049.43231-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 5 +++-- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 4 ++-- drivers/gpu/drm/xe/xe_bo.c | 8 +++---- drivers/gpu/drm/xe/xe_device.c | 36 ------------------------------ drivers/gpu/drm/xe/xe_device.h | 3 --- drivers/gpu/drm/xe/xe_device_types.h | 3 --- drivers/gpu/drm/xe/xe_exec_queue.c | 4 ++-- drivers/gpu/drm/xe/xe_ggtt.c | 9 ++++---- drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c | 5 +++-- drivers/gpu/drm/xe/xe_sched_job.c | 5 +++-- drivers/gpu/drm/xe/xe_vm.c | 6 ++--- 11 files changed, 25 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 3a584bc3a0a3..3e1ae37c4c8b 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,6 +10,7 @@ #include "intel_fb_pin.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_pm.h" #include @@ -193,7 +194,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, /* TODO: Consider sharing framebuffer mapping? * embed i915_vma inside intel_framebuffer */ - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); ret = mutex_lock_interruptible(&ggtt->lock); if (ret) goto out; @@ -244,7 +245,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, out_unlock: mutex_unlock(&ggtt->lock); out: - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return ret; } diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 264b957f3639..d46f87a039f2 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -215,7 +215,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, addr_out_off = PAGE_SIZE; host_session_id = xe_gsc_create_host_session_id(); - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off, HECI_MEADDRESS_HDCP, host_session_id, msg_in_len); @@ -247,6 +247,6 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, msg_out_len); out: - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return ret; } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 9889adcc458b..bc1f794e3e61 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -716,7 +716,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_assert(xe, migrate); trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { /* @@ -740,7 +740,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { ret = -EINVAL; - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); goto out; } @@ -758,7 +758,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, new_mem, handle_system_ccs); if (IS_ERR(fence)) { ret = PTR_ERR(fence); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); goto out; } if (!move_lacks_source) { @@ -783,7 +783,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, dma_fence_put(fence); } - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); out: return ret; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 3b0820594836..55bbc8b8df15 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -729,42 +729,6 @@ void xe_device_assert_mem_access(struct xe_device *xe) xe_assert(xe, !xe_pm_runtime_suspended(xe)); } -void xe_device_mem_access_get(struct xe_device *xe) -{ - int ref; - - /* - * This looks racy, but should be fine since the pm_callback_task only - * transitions from NULL -> current (and back to NULL again), during the - * runtime_resume() or runtime_suspend() callbacks, for which there can - * only be a single one running for our device. We only need to prevent - * recursively calling the runtime_get or runtime_put from those - * callbacks, as well as preventing triggering any access_ongoing - * asserts. - */ - if (xe_pm_read_callback_task(xe) == current) - return; - - xe_pm_runtime_get_noresume(xe); - ref = atomic_inc_return(&xe->mem_access.ref); - - xe_assert(xe, ref != S32_MAX); - -} - -void xe_device_mem_access_put(struct xe_device *xe) -{ - int ref; - - if (xe_pm_read_callback_task(xe) == current) - return; - - ref = atomic_dec_return(&xe->mem_access.ref); - xe_pm_runtime_put(xe); - - xe_assert(xe, ref >= 0); -} - void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p) { struct xe_gt *gt; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 54490802e97b..36d4434ebccc 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -133,9 +133,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) return >->mmio.fw; } -void xe_device_mem_access_get(struct xe_device *xe); -void xe_device_mem_access_put(struct xe_device *xe); - void xe_device_assert_mem_access(struct xe_device *xe); static inline bool xe_device_in_fault_mode(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8244b177a6a3..8a9f12a8d7c1 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -384,9 +384,6 @@ struct xe_device { * triggering additional actions when they occur. */ struct { - /** @mem_access.ref: ref count of memory accesses */ - atomic_t ref; - /** * @mem_access.vram_userfault: Encapsulate vram_userfault * related stuff diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 50ec661116a2..395de93579fa 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -589,7 +589,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* The migration vm doesn't hold rpm ref */ - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); @@ -598,7 +598,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, args->width, hwe, flags, args->extensions); - xe_device_mem_access_put(xe); /* now held by engine */ + xe_pm_runtime_put(xe); /* now held by engine */ xe_vm_put(migrate_vm); if (IS_ERR(new)) { diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 38f6c94c722d..0d541f55b4fc 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -21,6 +21,7 @@ #include "xe_gt_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_wopcm.h" @@ -403,7 +404,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (err) return err; - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, alignment, 0, start, end, 0); @@ -413,7 +414,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) xe_ggtt_invalidate(ggtt); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; } @@ -432,7 +433,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, bool invalidate) { - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); xe_ggtt_clear(ggtt, node->start, node->size); @@ -443,7 +444,7 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, if (invalidate) xe_ggtt_invalidate(ggtt); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); } void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index 3eaa17ca54fc..fae5be5a2a11 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -12,6 +12,7 @@ #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_guc_klv_helpers.h" +#include "xe_pm.h" /* * Return: number of KLVs that were successfully parsed and saved, @@ -368,7 +369,7 @@ int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset) { int err = 0; - xe_device_mem_access_get(gt_to_xe(gt)); + xe_pm_runtime_get_noresume(gt_to_xe(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (reset) @@ -378,7 +379,7 @@ int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset) err |= pf_reprovision_sample_period(gt); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); - xe_device_mem_access_put(gt_to_xe(gt)); + xe_pm_runtime_put(gt_to_xe(gt)); return err ? -ENXIO : 0; } diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 80daee910ae9..cd8a2fba5438 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -16,6 +16,7 @@ #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" +#include "xe_pm.h" #include "xe_sync_types.h" #include "xe_trace.h" #include "xe_vm.h" @@ -159,7 +160,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, /* All other jobs require a VM to be open which has a ref */ if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL)) - xe_device_mem_access_get(job_to_xe(job)); + xe_pm_runtime_get_noresume(job_to_xe(job)); xe_device_assert_mem_access(job_to_xe(job)); trace_xe_sched_job_create(job); @@ -192,7 +193,7 @@ void xe_sched_job_destroy(struct kref *ref) container_of(ref, struct xe_sched_job, refcount); if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL)) - xe_device_mem_access_put(job_to_xe(job)); + xe_pm_runtime_put(job_to_xe(job)); xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 8a858b8588bd..85d6f359142d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1266,7 +1266,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->pt_ops = &xelp_pt_ops; if (!(flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { @@ -1376,7 +1376,7 @@ err_no_resv: xe_range_fence_tree_fini(&vm->rftree[id]); kfree(vm); if (!(flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return ERR_PTR(err); } @@ -1507,7 +1507,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) mutex_destroy(&vm->snap_mutex); if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); for_each_tile(tile, xe, id) XE_WARN_ON(vm->pt_root[id]); -- cgit v1.2.3 From d3b80dc7aa393b559332a82963de954f225083ff Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Apr 2024 16:10:00 +0200 Subject: drm/xe/pf: Fix xe_gt_sriov_pf_config_print_available_ggtt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is using internal helper pf_get_spare_ggtt() that expects PF's master mutex to be locked. Fix that. Fixes: ac6598aed1b3 ("drm/xe/pf: Add support to configure SR-IOV VFs") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240419141000.314-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 0f5614877770..79116ad58620 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1932,14 +1932,17 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin const struct drm_mm *mm = &ggtt->mm; const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); - u64 spare = pf_get_spare_ggtt(gt); u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); u64 hole_start, hole_end, hole_size; - u64 avail, total = 0; + u64 spare, avail, total = 0; char buf[10]; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + + spare = pf_get_spare_ggtt(gt); + mutex_lock(&ggtt->lock); drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { @@ -1957,6 +1960,7 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin } mutex_unlock(&ggtt->lock); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "total:\t%llu\t(%s)\n", total, buf); -- cgit v1.2.3 From 48c64d495fbef343c59598a793d583dfd199d389 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Apr 2024 17:03:51 +0200 Subject: drm/xe/guc: Fix arguments passed to relay G2H handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default CT code was passing just payload of the G2H event message, while Relay code expects full G2H message including HXG header which contains DATA0 field. Fix that. Fixes: 26d4481ac23f ("drm/xe/guc: Start handling GuC Relay event messages") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240419150351.358-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index ac9324338ccf..8ac819a7061e 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1058,10 +1058,10 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) adj_len); break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: - ret = xe_guc_relay_process_guc2pf(&guc->relay, payload, adj_len); + ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); break; case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF: - ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len); + ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len); break; case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY: ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len); -- cgit v1.2.3