summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Walleij <linus.walleij@linaro.org>2024-12-21 23:22:38 +0100
committerLinus Walleij <linus.walleij@linaro.org>2024-12-21 23:22:38 +0100
commit78132a6772762b54e018d09b12809ef17f1cf6ad (patch)
tree514772142b3444e79c81c8982be52a4e77b9d1ef
parentc6cf34293ed3fa6f9c5ad0d430ebab3e72c7bec4 (diff)
parent2054d38ccf708e1add482c6345f7f349059b56e0 (diff)
Merge remote-tracking branch 'drm-xe/drm-xe-next' into drm-tip
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy.c39
-rw-r--r--drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h4
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h13
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c16
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c12
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c15
-rw-r--r--drivers/gpu/drm/xe/xe_device.c4
-rw-r--r--drivers/gpu/drm/xe/xe_device.h3
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c23
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c30
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c9
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c313
-rw-r--r--drivers/gpu/drm/xe/xe_irq.h8
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c24
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c11
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c34
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c4
-rw-r--r--drivers/gpu/drm/xe/xe_query.c3
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c6
-rw-r--r--include/drm/intel/pciids.h3
-rw-r--r--include/uapi/drm/xe_drm.h7
31 files changed, 537 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index 5ebc90d210d4..e768dc6a15b3 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -3068,7 +3068,10 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder)
val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port));
- clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val);
+ if (DISPLAY_VER(display) >= 30)
+ clock = REG_FIELD_GET(XE3_DDI_CLOCK_SELECT_MASK, val);
+ else
+ clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val);
drm_WARN_ON(display->drm, !(val & XELPDP_FORWARD_CLOCK_UNGATE));
drm_WARN_ON(display->drm, !(val & XELPDP_TBT_CLOCK_REQUEST));
@@ -3083,13 +3086,18 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder)
return 540000;
case XELPDP_DDI_CLOCK_SELECT_TBT_810:
return 810000;
+ case XELPDP_DDI_CLOCK_SELECT_TBT_312_5:
+ return 1000000;
+ case XELPDP_DDI_CLOCK_SELECT_TBT_625:
+ return 2000000;
default:
MISSING_CASE(clock);
return 162000;
}
}
-static int intel_mtl_tbt_clock_select(int clock)
+static int intel_mtl_tbt_clock_select(struct intel_display *display,
+ int clock)
{
switch (clock) {
case 162000:
@@ -3100,6 +3108,18 @@ static int intel_mtl_tbt_clock_select(int clock)
return XELPDP_DDI_CLOCK_SELECT_TBT_540;
case 810000:
return XELPDP_DDI_CLOCK_SELECT_TBT_810;
+ case 1000000:
+ if (DISPLAY_VER(display) < 30) {
+ drm_WARN_ON(display->drm, "UHBR10 not supported for the platform\n");
+ return XELPDP_DDI_CLOCK_SELECT_TBT_162;
+ }
+ return XELPDP_DDI_CLOCK_SELECT_TBT_312_5;
+ case 2000000:
+ if (DISPLAY_VER(display) < 30) {
+ drm_WARN_ON(display->drm, "UHBR20 not supported for the platform\n");
+ return XELPDP_DDI_CLOCK_SELECT_TBT_162;
+ }
+ return XELPDP_DDI_CLOCK_SELECT_TBT_625;
default:
MISSING_CASE(clock);
return XELPDP_DDI_CLOCK_SELECT_TBT_162;
@@ -3112,15 +3132,26 @@ static void intel_mtl_tbt_pll_enable(struct intel_encoder *encoder,
struct intel_display *display = to_intel_display(encoder);
enum phy phy = intel_encoder_to_phy(encoder);
u32 val = 0;
+ u32 mask;
/*
* 1. Program PORT_CLOCK_CTL REGISTER to configure
* clock muxes, gating and SSC
*/
- val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(crtc_state->port_clock));
+
+ if (DISPLAY_VER(display) >= 30) {
+ mask = XE3_DDI_CLOCK_SELECT_MASK;
+ val |= XE3_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock));
+ } else {
+ mask = XELPDP_DDI_CLOCK_SELECT_MASK;
+ val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock));
+ }
+
+ mask |= XELPDP_FORWARD_CLOCK_UNGATE;
val |= XELPDP_FORWARD_CLOCK_UNGATE;
+
intel_de_rmw(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port),
- XELPDP_DDI_CLOCK_SELECT_MASK | XELPDP_FORWARD_CLOCK_UNGATE, val);
+ mask, val);
/* 2. Read back PORT_CLOCK_CTL REGISTER */
val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port));
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h
index 4dc6e179a774..da154ff26b96 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h
@@ -192,7 +192,9 @@
#define XELPDP_TBT_CLOCK_REQUEST REG_BIT(19)
#define XELPDP_TBT_CLOCK_ACK REG_BIT(18)
#define XELPDP_DDI_CLOCK_SELECT_MASK REG_GENMASK(15, 12)
+#define XE3_DDI_CLOCK_SELECT_MASK REG_GENMASK(16, 12)
#define XELPDP_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XELPDP_DDI_CLOCK_SELECT_MASK, val)
+#define XE3_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XE3_DDI_CLOCK_SELECT_MASK, val)
#define XELPDP_DDI_CLOCK_SELECT_NONE 0x0
#define XELPDP_DDI_CLOCK_SELECT_MAXPCLK 0x8
#define XELPDP_DDI_CLOCK_SELECT_DIV18CLK 0x9
@@ -200,6 +202,8 @@
#define XELPDP_DDI_CLOCK_SELECT_TBT_270 0xd
#define XELPDP_DDI_CLOCK_SELECT_TBT_540 0xe
#define XELPDP_DDI_CLOCK_SELECT_TBT_810 0xf
+#define XELPDP_DDI_CLOCK_SELECT_TBT_312_5 0x18
+#define XELPDP_DDI_CLOCK_SELECT_TBT_625 0x19
#define XELPDP_FORWARD_CLOCK_UNGATE REG_BIT(10)
#define XELPDP_LANE1_PHY_CLOCK_SELECT REG_BIT(8)
#define XELPDP_SSC_ENABLE_PLLA REG_BIT(1)
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 10ec2920d31b..f4ee910f0943 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -33,12 +33,13 @@
#define MI_TOPOLOGY_FILTER __MI_INSTR(0xD)
#define MI_FORCE_WAKEUP __MI_INSTR(0x1D)
-#define MI_STORE_DATA_IMM __MI_INSTR(0x20)
-#define MI_SDI_GGTT REG_BIT(22)
-#define MI_SDI_LEN_DW GENMASK(9, 0)
-#define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
-#define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
- REG_BIT(21))
+#define MI_STORE_DATA_IMM __MI_INSTR(0x20)
+#define MI_SDI_GGTT REG_BIT(22)
+#define MI_FORCE_WRITE_COMPLETION_CHECK REG_BIT(10)
+#define MI_SDI_LEN_DW GENMASK(9, 0)
+#define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
+#define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
+ REG_BIT(21))
#define MI_LOAD_REGISTER_IMM __MI_INSTR(0x22)
#define MI_LRI_LRM_CS_MMIO REG_BIT(19)
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 7c78496e6213..d86219dedde2 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -83,6 +83,8 @@
#define RING_IMR(base) XE_REG((base) + 0xa8)
#define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac)
+#define CS_INT_VEC(base) XE_REG((base) + 0x1b8)
+
#define RING_EIR(base) XE_REG((base) + 0xb0)
#define RING_EMR(base) XE_REG((base) + 0xb4)
#define RING_ESR(base) XE_REG((base) + 0xb8)
@@ -138,6 +140,7 @@
#define RING_MODE(base) XE_REG((base) + 0x29c)
#define GFX_DISABLE_LEGACY_MODE REG_BIT(3)
+#define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13)
#define RING_TIMESTAMP(base) XE_REG((base) + 0x358)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 045dfd09db99..57944f90bbf6 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -25,6 +25,9 @@
#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3)
#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4)
+#define CTX_CS_INT_VEC_REG 0x5a
+#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1)
+
#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
#define INDIRECT_CTX_RING_START (0x06 + 1)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index c9ec7a313c6b..405ff904153e 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -606,8 +606,6 @@ static void xe_bo_shrink_kunit(struct kunit *test)
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
- KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
- {.speed = KUNIT_SPEED_SLOW}),
{}
};
@@ -618,3 +616,17 @@ struct kunit_suite xe_bo_test_suite = {
.init = xe_kunit_helper_xe_device_live_test_init,
};
EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
+
+static struct kunit_case xe_bo_shrink_test[] = {
+ KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
+ {.speed = KUNIT_SPEED_SLOW}),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_bo_shrink_test_suite = {
+ .name = "xe_bo_shrink",
+ .test_cases = xe_bo_shrink_test,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
index 0d36ab864ec0..81277c77016d 100644
--- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -6,11 +6,13 @@
#include <kunit/test.h>
extern struct kunit_suite xe_bo_test_suite;
+extern struct kunit_suite xe_bo_shrink_test_suite;
extern struct kunit_suite xe_dma_buf_test_suite;
extern struct kunit_suite xe_migrate_test_suite;
extern struct kunit_suite xe_mocs_test_suite;
kunit_test_suite(xe_bo_test_suite);
+kunit_test_suite(xe_bo_shrink_test_suite);
kunit_test_suite(xe_dma_buf_test_suite);
kunit_test_suite(xe_migrate_test_suite);
kunit_test_suite(xe_mocs_test_suite);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 283cd0294570..e6c896ad5602 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -733,7 +733,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
new_mem->mem_type == XE_PL_SYSTEM) {
long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP,
- true,
+ false,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0) {
ret = timeout;
@@ -857,8 +857,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
out:
if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
- ttm_bo->ttm)
+ ttm_bo->ttm) {
+ long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
+ DMA_RESV_USAGE_KERNEL,
+ false,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0)
+ ret = timeout;
+
xe_tt_unmap_sg(ttm_bo->ttm);
+ }
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 71636e80b71d..6980304c8903 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -119,7 +119,11 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
drm_puts(&p, "\n**** GuC CT ****\n");
xe_guc_ct_snapshot_print(ss->guc.ct, &p);
- drm_puts(&p, "\n**** Contexts ****\n");
+ /*
+ * Don't add a new section header here because the mesa debug decoder
+ * tool expects the context information to be in the 'GuC CT' section.
+ */
+ /* drm_puts(&p, "\n**** Contexts ****\n"); */
xe_guc_exec_queue_snapshot_print(ss->ge, &p);
drm_puts(&p, "\n**** Job ****\n");
@@ -416,6 +420,15 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
char buff[ASCII85_BUFSZ], *line_buff;
size_t line_pos = 0;
+ /*
+ * Splitting blobs across multiple lines is not compatible with the mesa
+ * debug decoder tool. Note that even dropping the explicit '\n' below
+ * doesn't help because the GuC log is so big some underlying implementation
+ * still splits the lines at 512K characters. So just bail completely for
+ * the moment.
+ */
+ return;
+
#define DMESG_MAX_LINE_LEN 800
#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 56d4ffb650da..bf36e4fb4679 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -325,7 +325,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->info.revid = pdev->revision;
xe->info.force_execlist = xe_modparam.force_execlist;
- spin_lock_init(&xe->irq.lock);
+ err = xe_irq_init(xe);
+ if (err)
+ goto err;
init_waitqueue_head(&xe->ufence_wq);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index f1fbfe916867..fc3c2af3fb7f 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -157,8 +157,7 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
static inline bool xe_device_has_msix(struct xe_device *xe)
{
- /* TODO: change this when MSI-X support is fully integrated */
- return false;
+ return xe->irq.msix.nvec > 0;
}
static inline bool xe_device_has_memirq(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index ace22e35e769..8a7b15972413 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -348,6 +348,14 @@ struct xe_device {
/** @irq.enabled: interrupts enabled on this device */
atomic_t enabled;
+
+ /** @irq.msix: irq info for platforms that support MSI-X */
+ struct {
+ /** @irq.msix.nvec: number of MSI-X interrupts */
+ u16 nvec;
+ /** @irq.msix.indexes: used to allocate MSI-X indexes */
+ struct xarray indexes;
+ } msix;
} irq;
/** @ttm: ttm device */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index aab9e561153d..8948f50ee58f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -8,6 +8,7 @@
#include <linux/nospec.h>
#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
#include <drm/drm_file.h>
#include <uapi/drm/xe_drm.h>
@@ -16,6 +17,7 @@
#include "xe_hw_engine_class_sysfs.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
+#include "xe_irq.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_migrate.h"
@@ -68,6 +70,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
q->gt = gt;
q->class = hwe->class;
q->width = width;
+ q->msix_vec = XE_IRQ_DEFAULT_MSIX;
q->logical_mask = logical_mask;
q->fence_irq = &gt->fence_irq[hwe->class];
q->ring_ops = gt->ring_ops[hwe->class];
@@ -117,7 +120,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
}
for (i = 0; i < q->width; ++i) {
- q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
+ q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec);
if (IS_ERR(q->lrc[i])) {
err = PTR_ERR(q->lrc[i]);
goto err_unlock;
@@ -766,19 +769,21 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
*/
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
- struct xe_file *xef;
+ struct xe_device *xe = gt_to_xe(q->gt);
struct xe_lrc *lrc;
u32 old_ts, new_ts;
+ int idx;
/*
- * Jobs that are run during driver load may use an exec_queue, but are
- * not associated with a user xe file, so avoid accumulating busyness
- * for kernel specific work.
+ * Jobs that are executed by kernel doesn't have a corresponding xe_file
+ * and thus are not accounted.
*/
- if (!q->vm || !q->vm->xef)
+ if (!q->xef)
return;
- xef = q->vm->xef;
+ /* Synchronize with unbind while holding the xe file open */
+ if (!drm_dev_enter(&xe->drm, &idx))
+ return;
/*
* Only sample the first LRC. For parallel submission, all of them are
@@ -790,7 +795,9 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/
lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
- xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
+ q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
+
+ drm_dev_exit(idx);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 1158b6062a6c..eec8f9935a58 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -63,6 +63,8 @@ struct xe_exec_queue {
char name[MAX_FENCE_NAME_LEN];
/** @width: width (number BB submitted per exec) of this exec queue */
u16 width;
+ /** @msix_vec: MSI-X vector (for platforms that support it) */
+ u16 msix_vec;
/** @fence_irq: fence IRQ used to signal job completion */
struct xe_hw_fence_irq *fence_irq;
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index a8c416a48812..5ef96deaa881 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -17,6 +17,7 @@
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_hw_fence.h"
+#include "xe_irq.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_mmio.h"
@@ -47,6 +48,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = gt_to_xe(gt);
u64 lrc_desc;
+ u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
lrc_desc = xe_lrc_descriptor(lrc);
@@ -80,8 +82,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
xe_bo_ggtt_addr(hwe->hwsp));
xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
- xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
- _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+
+ if (xe_device_has_msix(gt_to_xe(hwe->gt)))
+ ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
+ xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode);
xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
lower_32_bits(lrc_desc));
@@ -265,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
port->hwe = hwe;
- port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
+ port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX);
if (IS_ERR(port->lrc)) {
err = PTR_ERR(port->lrc);
goto err;
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index fd80afeef56a..ffd3ba7f6656 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -122,10 +122,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
if (!xe_gt_is_media_type(gt))
gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
- for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
- if ((gt->info.engine_mask & BIT(i)))
- gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
- VDN_MFXVDENC_POWERGATE_ENABLE(j));
+ if (xe->info.platform != XE_DG1) {
+ for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+ if ((gt->info.engine_mask & BIT(i)))
+ gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
+ VDN_MFXVDENC_POWERGATE_ENABLE(j));
+ }
}
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 65082f12f1a8..bd621df3ab91 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -2120,7 +2120,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
valid_any = valid_any || (valid_ggtt && is_primary);
if (IS_DGFX(xe)) {
- bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid);
+ bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid);
valid_any = valid_any || (valid_lmem && is_primary);
valid_all = valid_all && valid_lmem;
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 4e2868efb620..408365dfe4ee 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -147,6 +147,34 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc)
return flags;
}
+static bool needs_wa_dual_queue(struct xe_gt *gt)
+{
+ /*
+ * The DUAL_QUEUE_WA tells the GuC to not allow concurrent submissions
+ * on RCS and CCSes with different address spaces, which on DG2 is
+ * required as a WA for an HW bug.
+ */
+ if (XE_WA(gt, 22011391025))
+ return true;
+
+ /*
+ * On newer platforms, the HW has been updated to not allow parallel
+ * execution of different address spaces, so the RCS/CCS will stall the
+ * context switch if one of the other RCS/CCSes is busy with a different
+ * address space. While functionally correct, having a submission
+ * stalled on the HW limits the GuC ability to shuffle things around and
+ * can cause complications if the non-stalled submission runs for a long
+ * time, because the GuC doesn't know that the stalled submission isn't
+ * actually running and might declare it as hung. Therefore, we enable
+ * the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines
+ * to move management back to the GuC.
+ */
+ if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+ return true;
+
+ return false;
+}
+
static u32 guc_ctl_wa_flags(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
@@ -159,7 +187,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
if (XE_WA(gt, 14014475959))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
- if (XE_WA(gt, 22011391025))
+ if (needs_wa_dual_queue(gt))
flags |= GUC_WA_DUAL_QUEUE;
/*
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b19366744148..ac9c666a9652 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -324,6 +324,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
{
u32 ccs_mask =
xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+ u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
@@ -332,8 +333,10 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
xe_bo_ggtt_addr(hwe->hwsp));
- xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
- _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+
+ if (xe_device_has_msix(gt_to_xe(hwe->gt)))
+ ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
+ xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode);
xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
_MASKED_BIT_DISABLE(STOP_RING));
xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
@@ -772,7 +775,7 @@ static void check_gsc_availability(struct xe_gt *gt)
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
- drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
+ drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n");
}
}
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 1c509e66694d..32f5a67a917b 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -10,6 +10,7 @@
#include <drm/drm_managed.h>
#include "display/xe_display.h"
+#include "regs/xe_guc_regs.h"
#include "regs/xe_irq_regs.h"
#include "xe_device.h"
#include "xe_drv.h"
@@ -29,6 +30,11 @@
#define IIR(offset) XE_REG(offset + 0x8)
#define IER(offset) XE_REG(offset + 0xc)
+static int xe_irq_msix_init(struct xe_device *xe);
+static void xe_irq_msix_free(struct xe_device *xe);
+static int xe_irq_msix_request_irqs(struct xe_device *xe);
+static void xe_irq_msix_synchronize_irq(struct xe_device *xe);
+
static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg)
{
u32 val = xe_mmio_read32(mmio, reg);
@@ -572,6 +578,11 @@ static void xe_irq_reset(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return vf_irq_reset(xe);
+ if (xe_device_uses_memirq(xe)) {
+ for_each_tile(tile, xe, id)
+ xe_memirq_reset(&tile->memirq);
+ }
+
for_each_tile(tile, xe, id) {
if (GRAPHICS_VERx100(xe) >= 1210)
dg1_irq_reset(tile);
@@ -614,6 +625,14 @@ static void xe_irq_postinstall(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return vf_irq_postinstall(xe);
+ if (xe_device_uses_memirq(xe)) {
+ struct xe_tile *tile;
+ unsigned int id;
+
+ for_each_tile(tile, xe, id)
+ xe_memirq_postinstall(&tile->memirq);
+ }
+
xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
/*
@@ -656,60 +675,83 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe)
return xelp_irq_handler;
}
-static void irq_uninstall(void *arg)
+static int xe_irq_msi_request_irqs(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ irq_handler_t irq_handler;
+ int irq, err;
+
+ irq_handler = xe_irq_handler(xe);
+ if (!irq_handler) {
+ drm_err(&xe->drm, "No supported interrupt handler");
+ return -EINVAL;
+ }
+
+ irq = pci_irq_vector(pdev, 0);
+ err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
+ if (err < 0) {
+ drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void xe_irq_msi_free(struct xe_device *xe)
{
- struct xe_device *xe = arg;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
int irq;
+ irq = pci_irq_vector(pdev, 0);
+ free_irq(irq, xe);
+}
+
+static void irq_uninstall(void *arg)
+{
+ struct xe_device *xe = arg;
+
if (!atomic_xchg(&xe->irq.enabled, 0))
return;
xe_irq_reset(xe);
- irq = pci_irq_vector(pdev, 0);
- free_irq(irq, xe);
+ if (xe_device_has_msix(xe))
+ xe_irq_msix_free(xe);
+ else
+ xe_irq_msi_free(xe);
+}
+
+int xe_irq_init(struct xe_device *xe)
+{
+ spin_lock_init(&xe->irq.lock);
+
+ return xe_irq_msix_init(xe);
}
int xe_irq_install(struct xe_device *xe)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- unsigned int irq_flags = PCI_IRQ_MSIX;
- irq_handler_t irq_handler;
- int err, irq, nvec;
-
- irq_handler = xe_irq_handler(xe);
- if (!irq_handler) {
- drm_err(&xe->drm, "No supported interrupt handler");
- return -EINVAL;
- }
+ unsigned int irq_flags = PCI_IRQ_MSI;
+ int nvec = 1;
+ int err;
xe_irq_reset(xe);
- nvec = pci_msix_vec_count(pdev);
- if (nvec <= 0) {
- if (nvec == -EINVAL) {
- /* MSIX capability is not supported in the device, using MSI */
- irq_flags = PCI_IRQ_MSI;
- nvec = 1;
- } else {
- drm_err(&xe->drm, "MSIX: Failed getting count\n");
- return nvec;
- }
+ if (xe_device_has_msix(xe)) {
+ nvec = xe->irq.msix.nvec;
+ irq_flags = PCI_IRQ_MSIX;
}
err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags);
if (err < 0) {
- drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
+ drm_err(&xe->drm, "Failed to allocate IRQ vectors: %d\n", err);
return err;
}
- irq = pci_irq_vector(pdev, 0);
- err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
- if (err < 0) {
- drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err);
+ err = xe_device_has_msix(xe) ? xe_irq_msix_request_irqs(xe) :
+ xe_irq_msi_request_irqs(xe);
+ if (err)
return err;
- }
atomic_set(&xe->irq.enabled, 1);
@@ -722,18 +764,28 @@ int xe_irq_install(struct xe_device *xe)
return 0;
free_irq_handler:
- free_irq(irq, xe);
+ if (xe_device_has_msix(xe))
+ xe_irq_msix_free(xe);
+ else
+ xe_irq_msi_free(xe);
return err;
}
-void xe_irq_suspend(struct xe_device *xe)
+static void xe_irq_msi_synchronize_irq(struct xe_device *xe)
{
- int irq = to_pci_dev(xe->drm.dev)->irq;
+ synchronize_irq(to_pci_dev(xe->drm.dev)->irq);
+}
+void xe_irq_suspend(struct xe_device *xe)
+{
atomic_set(&xe->irq.enabled, 0); /* no new irqs */
- synchronize_irq(irq); /* flush irqs */
+ /* flush irqs */
+ if (xe_device_has_msix(xe))
+ xe_irq_msix_synchronize_irq(xe);
+ else
+ xe_irq_msi_synchronize_irq(xe);
xe_irq_reset(xe); /* turn irqs off */
}
@@ -754,3 +806,198 @@ void xe_irq_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_irq_enable_hwe(gt);
}
+
+/* MSI-X related definitions and functions below. */
+
+enum xe_irq_msix_static {
+ GUC2HOST_MSIX = 0,
+ DEFAULT_MSIX = XE_IRQ_DEFAULT_MSIX,
+ /* Must be last */
+ NUM_OF_STATIC_MSIX,
+};
+
+static int xe_irq_msix_init(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int nvec = pci_msix_vec_count(pdev);
+
+ if (nvec == -EINVAL)
+ return 0; /* MSI */
+
+ if (nvec < 0) {
+ drm_err(&xe->drm, "Failed getting MSI-X vectors count: %d\n", nvec);
+ return nvec;
+ }
+
+ xe->irq.msix.nvec = nvec;
+ xa_init_flags(&xe->irq.msix.indexes, XA_FLAGS_ALLOC);
+ return 0;
+}
+
+static irqreturn_t guc2host_irq_handler(int irq, void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_tile *tile;
+ u8 id;
+
+ if (!atomic_read(&xe->irq.enabled))
+ return IRQ_NONE;
+
+ for_each_tile(tile, xe, id)
+ xe_guc_irq_handler(&tile->primary_gt->uc.guc,
+ GUC_INTR_GUC2HOST);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg)
+{
+ unsigned int tile_id, gt_id;
+ struct xe_device *xe = arg;
+ struct xe_memirq *memirq;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+
+ if (!atomic_read(&xe->irq.enabled))
+ return IRQ_NONE;
+
+ for_each_tile(tile, xe, tile_id) {
+ memirq = &tile->memirq;
+ if (!memirq->bo)
+ continue;
+
+ for_each_gt(gt, xe, gt_id) {
+ if (gt->tile != tile)
+ continue;
+
+ for_each_hw_engine(hwe, gt, id)
+ xe_memirq_hwe_handler(memirq, hwe);
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int xe_irq_msix_alloc_vector(struct xe_device *xe, void *irq_buf,
+ bool dynamic_msix, u16 *msix)
+{
+ struct xa_limit limit;
+ int ret;
+ u32 id;
+
+ limit = (dynamic_msix) ? XA_LIMIT(NUM_OF_STATIC_MSIX, xe->irq.msix.nvec - 1) :
+ XA_LIMIT(*msix, *msix);
+ ret = xa_alloc(&xe->irq.msix.indexes, &id, irq_buf, limit, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ if (dynamic_msix)
+ *msix = id;
+
+ return 0;
+}
+
+static void xe_irq_msix_release_vector(struct xe_device *xe, u16 msix)
+{
+ xa_erase(&xe->irq.msix.indexes, msix);
+}
+
+static int xe_irq_msix_request_irq_internal(struct xe_device *xe, irq_handler_t handler,
+ void *irq_buf, const char *name, u16 msix)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int ret, irq;
+
+ irq = pci_irq_vector(pdev, msix);
+ if (irq < 0)
+ return irq;
+
+ ret = request_irq(irq, handler, IRQF_SHARED, name, irq_buf);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf,
+ const char *name, bool dynamic_msix, u16 *msix)
+{
+ int ret;
+
+ ret = xe_irq_msix_alloc_vector(xe, irq_buf, dynamic_msix, msix);
+ if (ret)
+ return ret;
+
+ ret = xe_irq_msix_request_irq_internal(xe, handler, irq_buf, name, *msix);
+ if (ret) {
+ drm_err(&xe->drm, "Failed to request IRQ for MSI-X %u\n", *msix);
+ xe_irq_msix_release_vector(xe, *msix);
+ return ret;
+ }
+
+ return 0;
+}
+
+void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int irq;
+ void *irq_buf;
+
+ irq_buf = xa_load(&xe->irq.msix.indexes, msix);
+ if (!irq_buf)
+ return;
+
+ irq = pci_irq_vector(pdev, msix);
+ if (irq < 0) {
+ drm_err(&xe->drm, "MSI-X %u can't be released, there is no matching IRQ\n", msix);
+ return;
+ }
+
+ free_irq(irq, irq_buf);
+ xe_irq_msix_release_vector(xe, msix);
+}
+
+int xe_irq_msix_request_irqs(struct xe_device *xe)
+{
+ int err;
+ u16 msix;
+
+ msix = GUC2HOST_MSIX;
+ err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe,
+ DRIVER_NAME "-guc2host", false, &msix);
+ if (err)
+ return err;
+
+ msix = DEFAULT_MSIX;
+ err = xe_irq_msix_request_irq(xe, xe_irq_msix_default_hwe_handler, xe,
+ DRIVER_NAME "-default-msix", false, &msix);
+ if (err) {
+ xe_irq_msix_free_irq(xe, GUC2HOST_MSIX);
+ return err;
+ }
+
+ return 0;
+}
+
+void xe_irq_msix_free(struct xe_device *xe)
+{
+ unsigned long msix;
+ u32 *dummy;
+
+ xa_for_each(&xe->irq.msix.indexes, msix, dummy)
+ xe_irq_msix_free_irq(xe, msix);
+ xa_destroy(&xe->irq.msix.indexes);
+}
+
+void xe_irq_msix_synchronize_irq(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ unsigned long msix;
+ u32 *dummy;
+
+ xa_for_each(&xe->irq.msix.indexes, msix, dummy)
+ synchronize_irq(pci_irq_vector(pdev, msix));
+}
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
index 067514e13675..a28bd577ba52 100644
--- a/drivers/gpu/drm/xe/xe_irq.h
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -6,13 +6,21 @@
#ifndef _XE_IRQ_H_
#define _XE_IRQ_H_
+#include <linux/interrupt.h>
+
+#define XE_IRQ_DEFAULT_MSIX 1
+
struct xe_device;
struct xe_tile;
struct xe_gt;
+int xe_irq_init(struct xe_device *xe);
int xe_irq_install(struct xe_device *xe);
void xe_irq_suspend(struct xe_device *xe);
void xe_irq_resume(struct xe_device *xe);
void xe_irq_enable_hwe(struct xe_gt *gt);
+int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf,
+ const char *name, bool dynamic_msix, u16 *msix);
+void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix);
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 22e58c6e2a35..bbb9ffbf6367 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -584,6 +584,7 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
{
struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
struct xe_device *xe = gt_to_xe(hwe->gt);
+ u8 num_regs;
if (!xe_device_uses_memirq(xe))
return;
@@ -593,12 +594,18 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
- regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+ num_regs = xe_device_has_msix(xe) ? 3 : 2;
+ regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
+
+ if (xe_device_has_msix(xe)) {
+ regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
+ /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
+ }
}
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
@@ -876,7 +883,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size)
+ struct xe_vm *vm, u32 ring_size, u16 msix_vec)
{
struct xe_gt *gt = hwe->gt;
struct xe_tile *tile = gt_to_tile(gt);
@@ -945,6 +952,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_drm_client_add_bo(vm->xef->client, lrc->bo);
}
+ if (xe_device_has_msix(xe)) {
+ xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
+ xe_memirq_status_ptr(&tile->memirq, hwe));
+ xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
+ xe_memirq_source_ptr(&tile->memirq, hwe));
+ xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
+ }
+
if (xe_gt_has_indirect_ring_state(gt)) {
xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
__xe_lrc_indirect_ring_ggtt_addr(lrc));
@@ -1005,6 +1020,7 @@ err_lrc_finish:
* @hwe: Hardware Engine
* @vm: The VM (address space)
* @ring_size: LRC ring size
+ * @msix_vec: MSI-X interrupt vector (for platforms that support it)
*
* Allocate and initialize the Logical Ring Context (LRC).
*
@@ -1012,7 +1028,7 @@ err_lrc_finish:
* upon failure.
*/
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size)
+ u32 ring_size, u16 msix_vec)
{
struct xe_lrc *lrc;
int err;
@@ -1021,7 +1037,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
if (!lrc)
return ERR_PTR(-ENOMEM);
- err = xe_lrc_init(lrc, hwe, vm, ring_size);
+ err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec);
if (err) {
kfree(lrc);
return ERR_PTR(err);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index b459dcab8787..4206e6a8b50a 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -42,7 +42,7 @@ struct xe_lrc_snapshot {
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size);
+ u32 ring_size, u16 msix_vec);
void xe_lrc_destroy(struct kref *ref);
/**
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 1b97d90aadda..8b32fad67878 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -581,7 +581,9 @@ static void emit_pte(struct xe_migrate *m,
while (ptes) {
u32 chunk = min(MAX_PTE_PER_SDI, ptes);
- bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM |
+ MI_FORCE_WRITE_COMPLETION_CHECK |
+ MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = ofs;
bb->cs[bb->len++] = 0;
@@ -1223,7 +1225,9 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
if (!(bb->len & 1))
bb->cs[bb->len++] = MI_NOOP;
- bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM |
+ MI_FORCE_WRITE_COMPLETION_CHECK |
+ MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = lower_32_bits(addr);
bb->cs[bb->len++] = upper_32_bits(addr);
if (pt_op->bind)
@@ -1388,7 +1392,8 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
u32 idx = 0;
bb->cs[bb->len++] = MI_STORE_DATA_IMM |
- MI_SDI_NUM_QW(chunk);
+ MI_FORCE_WRITE_COMPLETION_CHECK |
+ MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = ofs;
bb->cs[bb->len++] = 0; /* upper_32_bits */
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index ec88b18e9baa..ae94490b0eac 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -97,6 +97,7 @@ struct xe_oa_open_param {
int num_syncs;
struct xe_sync_entry *syncs;
size_t oa_buffer_size;
+ int wait_num_reports;
};
struct xe_oa_config_bo {
@@ -241,11 +242,10 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
{
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
+ u32 tail, hw_tail, partial_report_size, available;
int report_size = stream->oa_buffer.format->size;
- u32 tail, hw_tail;
unsigned long flags;
bool pollin;
- u32 partial_report_size;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
@@ -289,8 +289,8 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
stream->oa_buffer.tail = tail;
- pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail,
- stream->oa_buffer.head) >= report_size;
+ available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head);
+ pollin = available >= stream->wait_num_reports * report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -690,7 +690,9 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
u32 offset = xe_bo_ggtt_addr(lrc->bo);
do {
- bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT |
+ MI_FORCE_WRITE_COMPLETION_CHECK |
+ MI_SDI_NUM_DW(1);
bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = flex->value;
@@ -1285,6 +1287,17 @@ static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value,
return 0;
}
+static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (!value) {
+ drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value);
+ return -EINVAL;
+ }
+ param->wait_num_reports = value;
+ return 0;
+}
+
static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
struct xe_oa_open_param *param)
{
@@ -1306,6 +1319,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
[DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size,
+ [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports,
};
static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
@@ -1321,6 +1335,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
[DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval,
};
static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
@@ -1797,6 +1812,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
stream->no_preempt = param->no_preempt;
+ stream->wait_num_reports = param->wait_num_reports;
stream->xef = xe_file_get(param->xef);
stream->num_syncs = param->num_syncs;
@@ -2156,6 +2172,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
if (!param.oa_buffer_size)
param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE;
+ if (!param.wait_num_reports)
+ param.wait_num_reports = 1;
+ if (param.wait_num_reports > param.oa_buffer_size / f->size) {
+ drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports);
+ ret = -EINVAL;
+ goto err_exec_q;
+ }
+
ret = xe_oa_parse_syncs(oa, &param);
if (ret)
goto err_exec_q;
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index df7793915628..2dcd3b9562e9 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -218,6 +218,9 @@ struct xe_oa_stream {
/** @pollin: Whether there is data available to read */
bool pollin;
+ /** @wait_num_reports: Number of reports to wait for before signalling pollin */
+ int wait_num_reports;
+
/** @periodic: Whether periodic sampling is currently enabled */
bool periodic;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index a6761cb769b2..c6e57af0144c 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -7,6 +7,7 @@
#include <linux/fault-inject.h>
#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
#include <drm/drm_managed.h>
#include <drm/ttm/ttm_placement.h>
@@ -607,7 +608,8 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
struct device *dev = xe->drm.dev;
return dev->power.runtime_status == RPM_SUSPENDING ||
- dev->power.runtime_status == RPM_RESUMING;
+ dev->power.runtime_status == RPM_RESUMING ||
+ pm_suspend_target_state != PM_SUSPEND_ON;
#else
return false;
#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index d2a816f71bf2..c059639613f7 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -672,7 +672,8 @@ static int query_oa_units(struct xe_device *xe,
du->oa_unit_type = u->type;
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
- DRM_XE_OA_CAPS_OA_BUFFER_SIZE;
+ DRM_XE_OA_CAPS_OA_BUFFER_SIZE |
+ DRM_XE_OA_CAPS_WAIT_NUM_REPORTS;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 0be4f489d3e1..3a75a08b6be9 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -72,7 +72,8 @@ static int emit_user_interrupt(u32 *dw, int i)
static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
{
- dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT |
+ MI_FORCE_WRITE_COMPLETION_CHECK | MI_SDI_NUM_DW(1);
dw[i++] = addr;
dw[i++] = 0;
dw[i++] = value;
@@ -162,7 +163,8 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
u32 *dw, int i)
{
- dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1);
+ dw[i++] = MI_STORE_DATA_IMM | MI_FORCE_WRITE_COMPLETION_CHECK |
+ MI_SDI_NUM_QW(1);
dw[i++] = lower_32_bits(addr);
dw[i++] = upper_32_bits(addr);
dw[i++] = lower_32_bits(value);
diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index c6518b0992cf..77c826589ec1 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -858,6 +858,7 @@
MACRO__(0xB092, ## __VA_ARGS__), \
MACRO__(0xB0A0, ## __VA_ARGS__), \
MACRO__(0xB0A1, ## __VA_ARGS__), \
- MACRO__(0xB0A2, ## __VA_ARGS__)
+ MACRO__(0xB0A2, ## __VA_ARGS__), \
+ MACRO__(0xB0B0, ## __VA_ARGS__)
#endif /* __PCIIDS_H__ */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 0383b52cbd86..f62689ca861a 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1487,6 +1487,7 @@ struct drm_xe_oa_unit {
#define DRM_XE_OA_CAPS_BASE (1 << 0)
#define DRM_XE_OA_CAPS_SYNCS (1 << 1)
#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2)
+#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3)
/** @oa_timestamp_freq: OA timestamp freq */
__u64 oa_timestamp_freq;
@@ -1660,6 +1661,12 @@ enum drm_xe_oa_property_id {
* buffer is allocated by default.
*/
DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE,
+
+ /**
+ * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait
+ * for before unblocking poll or read
+ */
+ DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS,
};
/**