summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDanilo Krummrich <dakr@redhat.com>2024-05-13 22:28:28 +0200
committerDanilo Krummrich <dakr@redhat.com>2024-05-13 22:28:28 +0200
commitbadc4e6b9af54d67dfa62aa9721e3d43689065d9 (patch)
treec978861e27bcb88b07b08434e626ab6ba1f1c2d8
parent014aeee56a21c2b210a2808ecccfa1aedfe51bea (diff)
parent664de50cbfae048d08e9f3c1c0da377d1269e6d1 (diff)
Merge remote-tracking branch 'drm-xe/drm-xe-next' into drm-tip
# Conflicts: # drivers/gpu/drm/xe/xe_device.h # drivers/gpu/drm/xe/xe_vm.c
-rw-r--r--drivers/gpu/drm/xe/Makefile2
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h26
-rw-r--r--drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h170
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c1
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gpu_commands.h (renamed from drivers/gpu/drm/xe/regs/xe_gpu_commands.h)0
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h10
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h55
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h7
-rw-r--r--drivers/gpu/drm/xe/regs/xe_sriov_regs.h3
-rw-r--r--drivers/gpu/drm/xe/tests/Makefile1
-rw-r--r--drivers/gpu/drm/xe/tests/xe_args_test.c221
-rw-r--r--drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c232
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c86
-rw-r--r--drivers/gpu/drm/xe/xe_args.h143
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c61
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h1
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c58
-rw-r--r--drivers/gpu/drm/xe/xe_device.c46
-rw-r--r--drivers/gpu/drm/xe/xe_device.h13
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h9
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c21
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h7
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c32
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c18
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c348
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h18
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c550
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h36
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h52
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c64
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c14
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c50
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c70
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.h3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_db_mgr.c3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.c3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c8
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c156
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c150
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h24
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c9
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c192
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h10
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c2
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c39
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h7
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c300
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.h11
-rw-r--r--drivers/gpu/drm/xe/xe_module.c5
-rw-r--r--drivers/gpu/drm/xe/xe_module.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c11
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c14
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c62
-rw-r--r--drivers/gpu/drm/xe/xe_query.c1
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.h4
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c2
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h4
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_helpers.h26
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c15
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h1
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c13
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c780
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h3
-rw-r--r--drivers/gpu/drm/xe/xe_vm_doc.h24
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c18
93 files changed, 3757 insertions, 735 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index ae579b6c8763..b620389761d5 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -163,7 +163,9 @@ xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
xe_gt_sriov_pf_config.o \
xe_gt_sriov_pf_control.o \
+ xe_gt_sriov_pf_debugfs.o \
xe_gt_sriov_pf_policy.o \
+ xe_gt_sriov_pf_service.o \
xe_lmtt.o \
xe_lmtt_2l.o \
xe_lmtt_ml.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 511cf974d585..5c1d40432ca0 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -194,14 +194,18 @@ enum {
* granularity) since the GPUs clock time runs off a different crystal
* from the CPUs clock. Changing this KLV on a VF that is currently
* running a context wont take effect until a new context is scheduled in.
- * That said, when the PF is changing this value from 0xFFFFFFFF to
- * something else, it might never take effect if the VF is running an
- * inifinitely long compute or shader kernel. In such a scenario, the
+ * That said, when the PF is changing this value from 0x0 to
+ * a non-zero value, it might never take effect if the VF is running an
+ * infinitely long compute or shader kernel. In such a scenario, the
* PF would need to trigger a VM PAUSE and then change the KLV to force
* it to take effect. Such cases might typically happen on a 1PF+1VF
* Virtualization config enabled for heavier workloads like AI/ML.
*
+ * The max value for this KLV is 100 seconds, anything exceeding that
+ * will be clamped to the max.
+ *
* :0: infinite exec quantum (default)
+ * :100000: maximum exec quantum (100000ms == 100s)
*
* _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02
* This config sets the VF-preemption-timeout in microseconds.
@@ -211,15 +215,19 @@ enum {
* different crystal from the CPUs clock. Changing this KLV on a VF
* that is currently running a context wont take effect until a new
* context is scheduled in.
- * That said, when the PF is changing this value from 0xFFFFFFFF to
- * something else, it might never take effect if the VF is running an
- * inifinitely long compute or shader kernel.
+ * That said, when the PF is changing this value from 0x0 to
+ * a non-zero value, it might never take effect if the VF is running an
+ * infinitely long compute or shader kernel.
* In this case, the PF would need to trigger a VM PAUSE and then change
* the KLV to force it to take effect. Such cases might typically happen
* on a 1PF+1VF Virtualization config enabled for heavier workloads like
* AI/ML.
*
+ * The max value for this KLV is 100 seconds, anything exceeding that
+ * will be clamped to the max.
+ *
* :0: no preemption timeout (default)
+ * :100000000: maximum preemption timeout (100000000us == 100s)
*
* _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03
* This config sets threshold for CAT errors caused by the VF.
@@ -291,9 +299,11 @@ enum {
#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01
#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE 100000u
-#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02
-#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE 100000000u
#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03
#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u
diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
index 747e428de421..6c2834613081 100644
--- a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
@@ -1,11 +1,179 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright © 2023 Intel Corporation
+ * Copyright © 2023-2024 Intel Corporation
*/
#ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_
#define _ABI_GUC_RELAY_ACTIONS_ABI_H_
+#include "abi/guc_relay_communication_abi.h"
+
+/**
+ * DOC: GuC Relay VF/PF ABI Version
+ *
+ * The _`GUC_RELAY_VERSION_BASE` defines minimum VF/PF ABI version that
+ * drivers must support. Currently this is version 1.0.
+ *
+ * The _`GUC_RELAY_VERSION_LATEST` defines latest VF/PF ABI version that
+ * drivers may use. Currently this is version 1.0.
+ *
+ * Some platforms may require different base VF/PF ABI version.
+ * No supported VF/PF ABI version can be 0.0.
+ */
+
+#define GUC_RELAY_VERSION_BASE_MAJOR 1
+#define GUC_RELAY_VERSION_BASE_MINOR 0
+
+#define GUC_RELAY_VERSION_LATEST_MAJOR 1
+#define GUC_RELAY_VERSION_LATEST_MINOR 0
+
+/**
+ * DOC: GuC Relay Actions
+ *
+ * The following actions are supported from VF/PF ABI version 1.0:
+ *
+ * * `VF2PF_HANDSHAKE`_
+ * * `VF2PF_QUERY_RUNTIME`_
+ */
+
+/**
+ * DOC: VF2PF_HANDSHAKE
+ *
+ * This `Relay Message`_ is used by the VF to establish ABI version with the PF.
+ *
+ * Prior to exchanging any other messages, both VF driver and PF driver must
+ * negotiate the VF/PF ABI version that will be used in their communication.
+ *
+ * The VF driver shall use @MAJOR and @MINOR fields to pass requested ABI version.
+ * The VF driver may use special version 0.0 (both @MAJOR and @MINOR set to 0)
+ * to request latest (or any) ABI version that is supported by the PF driver.
+ *
+ * This message definition shall be supported by all future ABI versions.
+ * This message definition shall not be changed by future ABI versions.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_HANDSHAKE` = 0x0001 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:16 | **MAJOR** - requested major version of the VFPF interface |
+ * | | | (use MAJOR_ANY to request latest version supported by PF) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **MINOR** - requested minor version of the VFPF interface |
+ * | | | (use MINOR_ANY to request latest version supported by PF) |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:16 | **MAJOR** - agreed major version of the VFPF interface |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **MINOR** - agreed minor version of the VFPF interface |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_RELAY_ACTION_VF2PF_HANDSHAKE 0x0001u
+
+#define VF2PF_HANDSHAKE_REQUEST_MSG_LEN 2u
+#define VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR (0xffffu << 16)
+#define VF2PF_HANDSHAKE_MAJOR_ANY 0
+#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR (0xffffu << 0)
+#define VF2PF_HANDSHAKE_MINOR_ANY 0
+
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_LEN 2u
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR (0xffffu << 16)
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR (0xffffu << 0)
+
+/**
+ * DOC: VF2PF_QUERY_RUNTIME
+ *
+ * This `Relay Message`_ is used by the VF to query values of runtime registers.
+ *
+ * On some platforms, VF drivers may not have access to the some fuse registers
+ * (referred here as 'runtime registers') and therefore VF drivers need to ask
+ * the PF driver to obtain their values.
+ *
+ * However, the list of such registers, and their values, is fully owned and
+ * maintained by the PF driver and the VF driver may only initiate the query
+ * sequence and indicate in the @START field the starting index of the next
+ * requested register from this predefined list.
+ *
+ * In the response, the PF driver will return tuple of 32-bit register offset and
+ * the 32-bit value of that register (respectively @REG_OFFSET and @REG_VALUE).
+ *
+ * The VF driver can use @LIMIT field to limit number of returned register tuples.
+ * If @LIMIT is unset then PF decides about number of returned register tuples.
+ *
+ * This message definition is supported from ABI version 1.0.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = **LIMIT** - limit number of returned entries |
+ * | | | (use zero to not enforce any limits on the response) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME` = 0x0101 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | DATA1 = **START** - index of the first requested entry |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = **COUNT** - number of entries included in response |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | DATA1 = **REMAINING** - number of remaining entries |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | DATA2 = **REG_OFFSET** - offset of register[START] |
+ * +---+-------+--------------------------------------------------------------+
+ * | 3 | 31:0 | DATA3 = **REG_VALUE** - value of register[START] |
+ * +---+-------+--------------------------------------------------------------+
+ * | | | |
+ * +---+-------+--------------------------------------------------------------+
+ * |n-1| 31:0 | REG_OFFSET - offset of register[START + x] |
+ * +---+-------+--------------------------------------------------------------+
+ * | n | 31:0 | REG_VALUE - value of register[START + x] |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME 0x0101u
+
+#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN 2u
+#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2PF_QUERY_RUNTIME_NO_LIMIT 0u
+#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN (GUC_HXG_MSG_MIN_LEN + 1u)
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN \
+ (VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + VF2PF_QUERY_RUNTIME_MAX_COUNT * 2)
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0
+#define VF2PF_QUERY_RUNTIME_MIN_COUNT 0
+#define VF2PF_QUERY_RUNTIME_MAX_COUNT \
+ ((GUC_RELAY_MSG_MAX_LEN - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2)
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING GUC_HXG_RESPONSE_MSG_n_DATAn
+#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_OFFSETx GUC_HXG_RESPONSE_MSG_n_DATAn
+#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_VALUEx GUC_HXG_RESPONSE_MSG_n_DATAn
+
/**
* DOC: GuC Relay Debug Actions
*
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index d46f87a039f2..eb67ecf08db2 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -13,6 +13,7 @@
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_device_types.h"
+#include "xe_force_wake.h"
#include "xe_gsc_proxy.h"
#include "xe_gsc_submit.h"
#include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
index a255946b6f77..a255946b6f77 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index af71b87d8030..263ffc7bc2ef 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -44,9 +44,10 @@
#define GSCCS_RING_BASE 0x11a000
#define RING_TAIL(base) XE_REG((base) + 0x30)
+#define TAIL_ADDR REG_GENMASK(20, 3)
#define RING_HEAD(base) XE_REG((base) + 0x34)
-#define HEAD_ADDR 0x001FFFFC
+#define HEAD_ADDR REG_GENMASK(20, 2)
#define RING_START(base) XE_REG((base) + 0x38)
@@ -54,6 +55,8 @@
#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
+#define RING_START_UDW(base) XE_REG((base) + 0x48)
+
#define RING_PSMI_CTL(base) XE_REG((base) + 0x50, XE_REG_OPTION_MASKED)
#define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12)
#define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7)
@@ -65,6 +68,7 @@
#define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c)
#define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60)
#define RING_IPEHR(base) XE_REG((base) + 0x68)
+#define RING_INSTDONE(base) XE_REG((base) + 0x6c)
#define RING_ACTHD(base) XE_REG((base) + 0x74)
#define RING_DMA_FADD(base) XE_REG((base) + 0x78)
#define RING_HWS_PGA(base) XE_REG((base) + 0x80)
@@ -108,6 +112,8 @@
#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1)
#define REPLAY_MODE_GRANULARITY REG_BIT(0)
+#define INDIRECT_RING_STATE(base) XE_REG((base) + 0x108)
+
#define RING_BBADDR(base) XE_REG((base) + 0x140)
#define RING_BBADDR_UDW(base) XE_REG((base) + 0x168)
@@ -123,6 +129,7 @@
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
+#define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
@@ -135,7 +142,6 @@
#define RING_VALID_MASK 0x00000001
#define RING_VALID 0x00000001
#define STOP_RING REG_BIT(8)
-#define TAIL_ADDR 0x001FFFF8
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 26fb4943c79e..116224a9ccae 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -59,6 +59,27 @@
#define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4)
#define XEHP_GLOBAL_MOCS(i) XE_REG_MCR(0x4000 + (i) * 4)
+#define LE_SSE_MASK REG_GENMASK(18, 17)
+#define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value)
+#define LE_COS_MASK REG_GENMASK(16, 15)
+#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK)
+#define LE_SCF_MASK REG_BIT(14)
+#define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value)
+#define LE_PFM_MASK REG_GENMASK(13, 11)
+#define LE_PFM(value) REG_FIELD_PREP(LE_PFM_MASK, value)
+#define LE_SCC_MASK REG_GENMASK(10, 8)
+#define LE_SCC(value) REG_FIELD_PREP(LE_SCC_MASK, value)
+#define LE_RSC_MASK REG_BIT(7)
+#define LE_RSC(value) REG_FIELD_PREP(LE_RSC_MASK, value)
+#define LE_AOM_MASK REG_BIT(6)
+#define LE_AOM(value) REG_FIELD_PREP(LE_AOM_MASK, value)
+#define LE_LRUM_MASK REG_GENMASK(5, 4)
+#define LE_LRUM(value) REG_FIELD_PREP(LE_LRUM_MASK, value)
+#define LE_TGT_CACHE_MASK REG_GENMASK(3, 2)
+#define LE_TGT_CACHE(value) REG_FIELD_PREP(LE_TGT_CACHE_MASK, value)
+#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
+#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
+
#define CCS_AUX_INV XE_REG(0x4208)
#define VD0_AUX_INV XE_REG(0x4218)
@@ -98,6 +119,8 @@
#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16)
#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
+#define XEHPG_INSTDONE_GEOM_SVGUNIT XE_REG_MCR(0x666c)
+
#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED)
#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11)
@@ -115,6 +138,14 @@
#define FLSH_IGNORES_PSD REG_BIT(10)
#define FD_END_COLLECT REG_BIT(5)
+#define SC_INSTDONE XE_REG(0x7100)
+#define SC_INSTDONE_EXTRA XE_REG(0x7104)
+#define SC_INSTDONE_EXTRA2 XE_REG(0x7108)
+
+#define XEHPG_SC_INSTDONE XE_REG_MCR(0x7100)
+#define XEHPG_SC_INSTDONE_EXTRA XE_REG_MCR(0x7104)
+#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108)
+
#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
@@ -173,8 +204,11 @@
#define MAX_MSLICES 4
#define MEML3_EN_MASK REG_GENMASK(3, 0)
+#define MIRROR_FUSE1 XE_REG(0x911c)
+
#define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */
#define XELP_EU_MASK REG_GENMASK(7, 0)
+#define XELP_GT_SLICE_ENABLE XE_REG(0x9138)
#define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c)
#define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140)
@@ -301,9 +335,24 @@
#define XEHPC_OVRLSCCC REG_BIT(0)
/* L3 Cache Control */
+#define LNCFCMOCS_REG_COUNT 32
#define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4)
#define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4)
-#define LNCFCMOCS_REG_COUNT 32
+#define L3_UPPER_LKUP_MASK REG_BIT(23)
+#define L3_UPPER_GLBGO_MASK REG_BIT(22)
+#define L3_UPPER_IDX_CACHEABILITY_MASK REG_GENMASK(21, 20)
+#define L3_UPPER_IDX_SCC_MASK REG_GENMASK(19, 17)
+#define L3_UPPER_IDX_ESC_MASK REG_BIT(16)
+#define L3_LKUP_MASK REG_BIT(7)
+#define L3_LKUP(value) REG_FIELD_PREP(L3_LKUP_MASK, value)
+#define L3_GLBGO_MASK REG_BIT(6)
+#define L3_GLBGO(value) REG_FIELD_PREP(L3_GLBGO_MASK, value)
+#define L3_CACHEABILITY_MASK REG_GENMASK(5, 4)
+#define L3_CACHEABILITY(value) REG_FIELD_PREP(L3_CACHEABILITY_MASK, value)
+#define L3_SCC_MASK REG_GENMASK(3, 1)
+#define L3_SCC(value) REG_FIELD_PREP(L3_SCC_MASK, value)
+#define L3_ESC_MASK REG_BIT(0)
+#define L3_ESC(value) REG_FIELD_PREP(L3_ESC_MASK, value)
#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4)
#define XEHP_LNESPARE REG_BIT(19)
@@ -345,6 +394,9 @@
#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0)
+#define SAMPLER_INSTDONE XE_REG_MCR(0xe160)
+#define ROW_INSTDONE XE_REG_MCR(0xe164)
+
#define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
#define ENABLE_SMALLPL REG_BIT(15)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
@@ -353,6 +405,7 @@
#define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED)
#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
+#define CLEAR_OPTIMIZATION_DISABLE REG_BIT(6)
#define CACHE_MODE_SS XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED)
#define DISABLE_ECC REG_BIT(5)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 1825d8f79db6..e6ca8bbda8f4 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -11,6 +11,7 @@
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
+#define CTX_INDIRECT_RING_STATE (0x26 + 1)
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)
@@ -23,4 +24,10 @@
#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3)
#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4)
+#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
+#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
+#define INDIRECT_CTX_RING_START (0x06 + 1)
+#define INDIRECT_CTX_RING_START_UDW (0x08 + 1)
+#define INDIRECT_CTX_RING_CTL (0x0a + 1)
+
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
index 617ddb84b7fa..017b4ddd1ecf 100644
--- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -14,6 +14,9 @@
#define LMEM_EN REG_BIT(31)
#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */
+#define VIRTUAL_CTRL_REG XE_REG(0x10108c)
+#define GUEST_GTT_UPDATE_EN REG_BIT(8)
+
#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
#define VF_CAP REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 8cf2367449d8..6e58931fddd4 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -11,6 +11,7 @@ xe_live_test-y = xe_live_test_mod.o \
# Normal kunit tests
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
xe_test-y = xe_test_mod.o \
+ xe_args_test.o \
xe_pci_test.o \
xe_rtp_test.o \
xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c
new file mode 100644
index 000000000000..f3fb23aa5d2e
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_args_test.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_args.h"
+
+static void call_args_example(struct kunit *test)
+{
+#define foo X, Y, Z, Q
+#define bar COUNT_ARGS(foo)
+#define buz CALL_ARGS(COUNT_ARGS, foo)
+
+ KUNIT_EXPECT_EQ(test, bar, 1);
+ KUNIT_EXPECT_EQ(test, buz, 4);
+
+#undef foo
+#undef bar
+#undef buz
+}
+
+static void drop_first_arg_example(struct kunit *test)
+{
+#define foo X, Y, Z, Q
+#define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo))
+
+ KUNIT_EXPECT_EQ(test, bar, 3);
+
+#undef foo
+#undef bar
+}
+
+static void first_arg_example(struct kunit *test)
+{
+ int X = 1;
+
+#define foo X, Y, Z, Q
+#define bar FIRST_ARG(foo)
+
+ KUNIT_EXPECT_EQ(test, bar, X);
+ KUNIT_EXPECT_STREQ(test, __stringify(bar), "X");
+
+#undef foo
+#undef bar
+}
+
+static void last_arg_example(struct kunit *test)
+{
+ int Q = 1;
+
+#define foo X, Y, Z, Q
+#define bar LAST_ARG(foo)
+
+ KUNIT_EXPECT_EQ(test, bar, Q);
+ KUNIT_EXPECT_STREQ(test, __stringify(bar), "Q");
+
+#undef foo
+#undef bar
+}
+
+static void pick_arg_example(struct kunit *test)
+{
+ int Y = 1, Z = 2;
+
+#define foo X, Y, Z, Q
+#define bar PICK_ARG(2, foo)
+#define buz PICK_ARG3(foo)
+
+ KUNIT_EXPECT_EQ(test, bar, Y);
+ KUNIT_EXPECT_STREQ(test, __stringify(bar), "Y");
+ KUNIT_EXPECT_EQ(test, buz, Z);
+ KUNIT_EXPECT_STREQ(test, __stringify(buz), "Z");
+
+#undef foo
+#undef bar
+#undef buz
+}
+
+static void sep_comma_example(struct kunit *test)
+{
+#define foo(f) f(X) f(Y) f(Z) f(Q)
+#define bar DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify))
+#define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA)))
+
+ static const char * const a[] = { bar };
+
+ KUNIT_EXPECT_STREQ(test, a[0], "X");
+ KUNIT_EXPECT_STREQ(test, a[1], "Y");
+ KUNIT_EXPECT_STREQ(test, a[2], "Z");
+ KUNIT_EXPECT_STREQ(test, a[3], "Q");
+
+ KUNIT_EXPECT_EQ(test, buz, 4);
+
+#undef foo
+#undef bar
+#undef buz
+}
+
+#define NO_ARGS
+#define FOO_ARGS X, Y, Z, Q
+#define MAX_ARGS -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12
+
+static void count_args_test(struct kunit *test)
+{
+ int count;
+
+ /* COUNT_ARGS() counts to 12 */
+
+ count = COUNT_ARGS();
+ KUNIT_EXPECT_EQ(test, count, 0);
+
+ count = COUNT_ARGS(1);
+ KUNIT_EXPECT_EQ(test, count, 1);
+
+ count = COUNT_ARGS(a, b, c, d, e);
+ KUNIT_EXPECT_EQ(test, count, 5);
+
+ count = COUNT_ARGS(a, b, c, d, e, f, g, h, i, j, k, l);
+ KUNIT_EXPECT_EQ(test, count, 12);
+
+ /* COUNT_ARGS() does not expand params */
+
+ count = COUNT_ARGS(NO_ARGS);
+ KUNIT_EXPECT_EQ(test, count, 1);
+
+ count = COUNT_ARGS(FOO_ARGS);
+ KUNIT_EXPECT_EQ(test, count, 1);
+}
+
+static void call_args_test(struct kunit *test)
+{
+ int count;
+
+ count = CALL_ARGS(COUNT_ARGS, NO_ARGS);
+ KUNIT_EXPECT_EQ(test, count, 0);
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, NO_ARGS), 0);
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS), 4);
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS, FOO_ARGS), 8);
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, MAX_ARGS), 12);
+}
+
+static void drop_first_arg_test(struct kunit *test)
+{
+ int Y = -2, Z = -3, Q = -4;
+ int a[] = { DROP_FIRST_ARG(FOO_ARGS) };
+
+ KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(0, -1), -1);
+ KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(0, -1, -2)), -2);
+
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(FOO_ARGS)), 3);
+ KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))), -4);
+ KUNIT_EXPECT_EQ(test, a[0], -2);
+ KUNIT_EXPECT_EQ(test, a[1], -3);
+ KUNIT_EXPECT_EQ(test, a[2], -4);
+
+#define foo DROP_FIRST_ARG(FOO_ARGS)
+#define bar DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))
+#define buz DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS)))
+
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, foo), 3);
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, bar), 2);
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, buz), 1);
+ KUNIT_EXPECT_STREQ(test, __stringify(buz), "Q");
+
+#undef foo
+#undef bar
+#undef buz
+}
+
+static void first_arg_test(struct kunit *test)
+{
+ int X = -1;
+ int a[] = { FIRST_ARG(FOO_ARGS) };
+
+ KUNIT_EXPECT_EQ(test, FIRST_ARG(-1, -2), -1);
+
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FIRST_ARG(FOO_ARGS)), 1);
+ KUNIT_EXPECT_EQ(test, FIRST_ARG(FOO_ARGS), -1);
+ KUNIT_EXPECT_EQ(test, a[0], -1);
+ KUNIT_EXPECT_STREQ(test, __stringify(FIRST_ARG(FOO_ARGS)), "X");
+}
+
+static void last_arg_test(struct kunit *test)
+{
+ int Q = -4;
+ int a[] = { LAST_ARG(FOO_ARGS) };
+
+ KUNIT_EXPECT_EQ(test, LAST_ARG(-1, -2), -2);
+
+ KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, LAST_ARG(FOO_ARGS)), 1);
+ KUNIT_EXPECT_EQ(test, LAST_ARG(FOO_ARGS), -4);
+ KUNIT_EXPECT_EQ(test, a[0], -4);
+ KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(FOO_ARGS)), "Q");
+
+ KUNIT_EXPECT_EQ(test, LAST_ARG(MAX_ARGS), -12);
+ KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(MAX_ARGS)), "-12");
+}
+
+static struct kunit_case args_tests[] = {
+ KUNIT_CASE(count_args_test),
+ KUNIT_CASE(call_args_example),
+ KUNIT_CASE(call_args_test),
+ KUNIT_CASE(drop_first_arg_example),
+ KUNIT_CASE(drop_first_arg_test),
+ KUNIT_CASE(first_arg_example),
+ KUNIT_CASE(first_arg_test),
+ KUNIT_CASE(last_arg_example),
+ KUNIT_CASE(last_arg_test),
+ KUNIT_CASE(pick_arg_example),
+ KUNIT_CASE(sep_comma_example),
+ {}
+};
+
+static struct kunit_suite args_test_suite = {
+ .name = "args",
+ .test_cases = args_tests,
+};
+
+kunit_test_suite(args_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c
new file mode 100644
index 000000000000..b683585db852
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+#include "xe_pci_test.h"
+
+static int pf_service_test_init(struct kunit *test)
+{
+ struct xe_pci_fake_data fake = {
+ .sriov_mode = XE_SRIOV_MODE_PF,
+ .platform = XE_TIGERLAKE, /* some random platform */
+ .subplatform = XE_SUBPLATFORM_NONE,
+ };
+ struct xe_device *xe;
+ struct xe_gt *gt;
+
+ test->priv = &fake;
+ xe_kunit_helper_xe_device_test_init(test);
+
+ xe = test->priv;
+ KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
+
+ gt = xe_device_get_gt(xe, 0);
+ pf_init_versions(gt);
+
+ /*
+ * sanity check:
+ * - all supported platforms VF/PF ABI versions must be defined
+ * - base version can't be newer than latest
+ */
+ KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major,
+ gt->sriov.pf.service.version.latest.major);
+ if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
+ KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor,
+ gt->sriov.pf.service.version.latest.minor);
+
+ test->priv = gt;
+ return 0;
+}
+
+static void pf_negotiate_any(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY,
+ VF2PF_HANDSHAKE_MINOR_ANY,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_base_match(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.base.major,
+ gt->sriov.pf.service.version.base.minor,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor);
+}
+
+static void pf_negotiate_base_newer(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.base.major,
+ gt->sriov.pf.service.version.base.minor + 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor);
+ if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
+ KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor);
+ else
+ KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
+}
+
+static void pf_negotiate_base_next(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.base.major + 1, 0,
+ &major, &minor));
+ KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major);
+ if (major == gt->sriov.pf.service.version.latest.major)
+ KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor);
+ else
+ KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
+}
+
+static void pf_negotiate_base_older(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ if (!gt->sriov.pf.service.version.base.minor)
+ kunit_skip(test, "no older minor\n");
+
+ KUNIT_ASSERT_NE(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.base.major,
+ gt->sriov.pf.service.version.base.minor - 1,
+ &major, &minor));
+}
+
+static void pf_negotiate_base_prev(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_NE(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.base.major - 1, 1,
+ &major, &minor));
+}
+
+static void pf_negotiate_latest_match(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.latest.major,
+ gt->sriov.pf.service.version.latest.minor,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_newer(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.latest.major,
+ gt->sriov.pf.service.version.latest.minor + 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_next(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.latest.major + 1, 0,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_older(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ if (!gt->sriov.pf.service.version.latest.minor)
+ kunit_skip(test, "no older minor\n");
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.latest.major,
+ gt->sriov.pf.service.version.latest.minor - 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1);
+}
+
+static void pf_negotiate_latest_prev(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ u32 major, minor;
+
+ if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
+ kunit_skip(test, "no prev major");
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(gt,
+ gt->sriov.pf.service.version.latest.major - 1,
+ gt->sriov.pf.service.version.base.minor + 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1);
+ KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major);
+}
+
+static struct kunit_case pf_service_test_cases[] = {
+ KUNIT_CASE(pf_negotiate_any),
+ KUNIT_CASE(pf_negotiate_base_match),
+ KUNIT_CASE(pf_negotiate_base_newer),
+ KUNIT_CASE(pf_negotiate_base_next),
+ KUNIT_CASE(pf_negotiate_base_older),
+ KUNIT_CASE(pf_negotiate_base_prev),
+ KUNIT_CASE(pf_negotiate_latest_match),
+ KUNIT_CASE(pf_negotiate_latest_newer),
+ KUNIT_CASE(pf_negotiate_latest_next),
+ KUNIT_CASE(pf_negotiate_latest_older),
+ KUNIT_CASE(pf_negotiate_latest_prev),
+ {}
+};
+
+static struct kunit_suite pf_service_suite = {
+ .name = "pf_service",
+ .test_cases = pf_service_test_cases,
+ .init = pf_service_test_init,
+};
+
+kunit_test_suite(pf_service_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 977d5f4e4490..b6e7f80c3774 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -62,36 +62,6 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
return 0;
}
-static void
-sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
- struct xe_tile *tile, struct iosys_map *map, void *dst,
- u32 qword_ofs, u32 num_qwords,
- const struct xe_vm_pgtable_update *update)
-{
- struct migrate_test_params *p =
- to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));
- int i;
- u64 *ptr = dst;
- u64 value;
-
- for (i = 0; i < num_qwords; i++) {
- value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
- if (map)
- xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
- sizeof(u64), u64, value);
- else
- ptr[i] = value;
- }
-
- kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU");
- if (p->force_gpu && map)
- KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n");
-}
-
-static const struct xe_migrate_pt_update_ops sanity_ops = {
- .populate = sanity_populate_cb,
-};
-
#define check(_retval, _expected, str, _test) \
do { if ((_retval) != (_expected)) { \
KUNIT_FAIL(_test, "Sanity check failed: " str \
@@ -209,57 +179,6 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
test_copy(m, bo, test, region);
}
-static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
- struct kunit *test, bool force_gpu)
-{
- struct xe_device *xe = tile_to_xe(m->tile);
- struct dma_fence *fence;
- u64 retval, expected;
- ktime_t then, now;
- int i;
-
- struct xe_vm_pgtable_update update = {
- .ofs = 1,
- .qwords = 0x10,
- .pt_bo = pt,
- };
- struct xe_migrate_pt_update pt_update = {
- .ops = &sanity_ops,
- };
- struct migrate_test_params p = {
- .base.id = XE_TEST_LIVE_MIGRATE,
- .force_gpu = force_gpu,
- };
-
- test->priv = &p;
- /* Test xe_migrate_update_pgtables() updates the pagetable as expected */
- expected = 0xf0f0f0f0f0f0f0f0ULL;
- xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
-
- then = ktime_get();
- fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1,
- NULL, 0, &pt_update);
- now = ktime_get();
- if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
- return;
-
- kunit_info(test, "Updating without syncing took %llu us,\n",
- (unsigned long long)ktime_to_us(ktime_sub(now, then)));
-
- dma_fence_put(fence);
- retval = xe_map_rd(xe, &pt->vmap, 0, u64);
- check(retval, expected, "PTE[0] must stay untouched", test);
-
- for (i = 0; i < update.qwords; i++) {
- retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
- check(retval, i * 0x1111111111111111ULL, "PTE update", test);
- }
-
- retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
- u64);
- check(retval, expected, "PTE[0x11] must stay untouched", test);
-}
-
static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
{
struct xe_tile *tile = m->tile;
@@ -398,11 +317,6 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
test_copy_vram(m, big, test);
}
- kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
- test_pt_update(m, pt, test, false);
- kunit_info(test, "Testing page table update using GPU\n");
- test_pt_update(m, pt, test, true);
-
out:
xe_bb_free(bb, NULL);
free_tiny:
diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h
new file mode 100644
index 000000000000..4dbc7e53c624
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_args.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_ARGS_H_
+#define _XE_ARGS_H_
+
+#include <linux/args.h>
+
+/*
+ * Why don't the following macros have the XE prefix?
+ *
+ * Once we find more potential users outside of the Xe driver, we plan to move
+ * all of the following macros unchanged to linux/args.h.
+ */
+
+/**
+ * CALL_ARGS - Invoke a macro, but allow parameters to be expanded beforehand.
+ * @f: name of the macro to invoke
+ * @args: arguments for the macro
+ *
+ * This macro allows calling macros which names might generated or we want to
+ * make sure it's arguments will be correctly expanded.
+ *
+ * Example:
+ *
+ * #define foo X,Y,Z,Q
+ * #define bar COUNT_ARGS(foo)
+ * #define buz CALL_ARGS(COUNT_ARGS, foo)
+ *
+ * With above definitions bar expands to 1 while buz expands to 4.
+ */
+#define CALL_ARGS(f, args...) __CALL_ARGS(f, args)
+#define __CALL_ARGS(f, args...) f(args)
+
+/**
+ * DROP_FIRST_ARG - Returns all arguments except the first one.
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Example:
+ *
+ * #define foo X,Y,Z,Q
+ * #define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo))
+ *
+ * With above definitions bar expands to 3.
+ */
+#define DROP_FIRST_ARG(args...) __DROP_FIRST_ARG(args)
+#define __DROP_FIRST_ARG(a, b...) b
+
+/**
+ * FIRST_ARG - Returns the first argument.
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Example:
+ *
+ * #define foo X,Y,Z,Q
+ * #define bar FIRST_ARG(foo)
+ *
+ * With above definitions bar expands to X.
+ */
+#define FIRST_ARG(args...) __FIRST_ARG(args)
+#define __FIRST_ARG(a, b...) a
+
+/**
+ * LAST_ARG - Returns the last argument.
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Like COUNT_ARGS() this macro works up to 12 arguments.
+ *
+ * Example:
+ *
+ * #define foo X,Y,Z,Q
+ * #define bar LAST_ARG(foo)
+ *
+ * With above definitions bar expands to Q.
+ */
+#define LAST_ARG(args...) __LAST_ARG(args)
+#define __LAST_ARG(args...) PICK_ARG(COUNT_ARGS(args), args)
+
+/**
+ * PICK_ARG - Returns the n-th argument.
+ * @n: argument number to be returned
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Like COUNT_ARGS() this macro supports n up to 12.
+ * Specialized macros PICK_ARG1() to PICK_ARG12() are also available.
+ *
+ * Example:
+ *
+ * #define foo X,Y,Z,Q
+ * #define bar PICK_ARG(2, foo)
+ * #define buz PICK_ARG3(foo)
+ *
+ * With above definitions bar expands to Y and buz expands to Z.
+ */
+#define PICK_ARG(n, args...) __PICK_ARG(n, args)
+#define __PICK_ARG(n, args...) CALL_ARGS(CONCATENATE(PICK_ARG, n), args)
+#define PICK_ARG1(args...) FIRST_ARG(args)
+#define PICK_ARG2(args...) PICK_ARG1(DROP_FIRST_ARG(args))
+#define PICK_ARG3(args...) PICK_ARG2(DROP_FIRST_ARG(args))
+#define PICK_ARG4(args...) PICK_ARG3(DROP_FIRST_ARG(args))
+#define PICK_ARG5(args...) PICK_ARG4(DROP_FIRST_ARG(args))
+#define PICK_ARG6(args...) PICK_ARG5(DROP_FIRST_ARG(args))
+#define PICK_ARG7(args...) PICK_ARG6(DROP_FIRST_ARG(args))
+#define PICK_ARG8(args...) PICK_ARG7(DROP_FIRST_ARG(args))
+#define PICK_ARG9(args...) PICK_ARG8(DROP_FIRST_ARG(args))
+#define PICK_ARG10(args...) PICK_ARG9(DROP_FIRST_ARG(args))
+#define PICK_ARG11(args...) PICK_ARG10(DROP_FIRST_ARG(args))
+#define PICK_ARG12(args...) PICK_ARG11(DROP_FIRST_ARG(args))
+
+/**
+ * ARGS_SEP_COMMA - Definition of a comma character.
+ *
+ * This definition can be used in cases where any intermediate macro expects
+ * fixed number of arguments, but we want to pass more arguments which can
+ * be properly evaluated only by the next level macro.
+ *
+ * Example:
+ *
+ * #define foo(f) f(X) f(Y) f(Z) f(Q)
+ * #define bar DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify))
+ * #define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA)))
+ *
+ * With above definitions bar expands to
+ * "X", "Y", "Z", "Q"
+ * and buz expands to 4.
+ */
+#define ARGS_SEP_COMMA ,
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 541361caff3b..a13e0b3a169e 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -6,7 +6,7 @@
#include "xe_bb.h"
#include "instructions/xe_mi_commands.h"
-#include "regs/xe_gpu_commands.h"
+#include "xe_assert.h"
#include "xe_device.h"
#include "xe_exec_queue_types.h"
#include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index bc1f794e3e61..03f7fe7acf8c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -96,6 +96,20 @@ bool xe_bo_is_stolen(struct xe_bo *bo)
}
/**
+ * xe_bo_has_single_placement - check if BO is placed only in one memory location
+ * @bo: The BO
+ *
+ * This function checks whether a given BO is placed in only one memory location.
+ *
+ * Returns: true if the BO is placed in a single memory location, false otherwise.
+ *
+ */
+bool xe_bo_has_single_placement(struct xe_bo *bo)
+{
+ return bo->placement.num_placement == 1;
+}
+
+/**
* xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
* @bo: The BO
*
@@ -302,6 +316,18 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
return 0;
}
+static void xe_tt_unmap_sg(struct ttm_tt *tt)
+{
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->sg) {
+ dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+ DMA_BIDIRECTIONAL, 0);
+ sg_free_table(xe_tt->sg);
+ xe_tt->sg = NULL;
+ }
+}
+
struct sg_table *xe_bo_sg(struct xe_bo *bo)
{
struct ttm_tt *tt = bo->ttm.ttm;
@@ -377,27 +403,15 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
if (err)
return err;
- /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
- err = xe_tt_map_sg(tt);
- if (err)
- ttm_pool_free(&ttm_dev->pool, tt);
-
return err;
}
static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
{
- struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
-
if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
return;
- if (xe_tt->sg) {
- dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
- DMA_BIDIRECTIONAL, 0);
- sg_free_table(xe_tt->sg);
- xe_tt->sg = NULL;
- }
+ xe_tt_unmap_sg(tt);
return ttm_pool_free(&ttm_dev->pool, tt);
}
@@ -628,17 +642,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
ttm && ttm_tt_is_populated(ttm)) ? true : false;
int ret = 0;
+
/* Bo creation path, moving to system or TT. */
if ((!old_mem && ttm) && !handle_system_ccs) {
- ttm_bo_move_null(ttm_bo, new_mem);
- return 0;
+ if (new_mem->mem_type == XE_PL_TT)
+ ret = xe_tt_map_sg(ttm);
+ if (!ret)
+ ttm_bo_move_null(ttm_bo, new_mem);
+ goto out;
}
if (ttm_bo->type == ttm_bo_type_sg) {
ret = xe_bo_move_notify(bo, ctx);
if (!ret)
ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
- goto out;
+ return ret;
}
tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
@@ -650,6 +668,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
(!ttm && ttm_bo->type == ttm_bo_type_device);
+ if (new_mem->mem_type == XE_PL_TT) {
+ ret = xe_tt_map_sg(ttm);
+ if (ret)
+ goto out;
+ }
+
if ((move_lacks_source && !needs_clear)) {
ttm_bo_move_null(ttm_bo, new_mem);
goto out;
@@ -786,8 +810,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
xe_pm_runtime_put(xe);
out:
- return ret;
+ if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
+ ttm_bo->ttm)
+ xe_tt_unmap_sg(ttm_bo->ttm);
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index a885b14bf595..6de894c728f5 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -206,6 +206,7 @@ bool mem_type_is_vram(u32 mem_type);
bool xe_bo_is_vram(struct xe_bo *bo);
bool xe_bo_is_stolen(struct xe_bo *bo);
bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
+bool xe_bo_has_single_placement(struct xe_bo *bo);
uint64_t vram_region_gpu_offset(struct ttm_resource *res);
bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 0b7aebaae843..1011e5d281fa 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -12,7 +12,10 @@
#include "xe_bo.h"
#include "xe_device.h"
+#include "xe_force_wake.h"
#include "xe_gt_debugfs.h"
+#include "xe_gt_printk.h"
+#include "xe_guc_ads.h"
#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_step.h"
@@ -118,6 +121,58 @@ static const struct file_operations forcewake_all_fops = {
.release = forcewake_release,
};
+static ssize_t wedged_mode_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ char buf[32];
+ int len = 0;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n", xe->wedged.mode);
+
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ struct xe_gt *gt;
+ u32 wedged_mode;
+ ssize_t ret;
+ u8 id;
+
+ ret = kstrtouint_from_user(ubuf, size, 0, &wedged_mode);
+ if (ret)
+ return ret;
+
+ if (wedged_mode > 2)
+ return -EINVAL;
+
+ if (xe->wedged.mode == wedged_mode)
+ return 0;
+
+ xe->wedged.mode = wedged_mode;
+
+ xe_pm_runtime_get(xe);
+ for_each_gt(gt, xe, id) {
+ ret = xe_guc_ads_scheduler_policy_toggle_reset(&gt->uc.guc.ads);
+ if (ret) {
+ xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n");
+ return -EIO;
+ }
+ }
+ xe_pm_runtime_put(xe);
+
+ return size;
+}
+
+static const struct file_operations wedged_mode_fops = {
+ .owner = THIS_MODULE,
+ .read = wedged_mode_show,
+ .write = wedged_mode_set,
+};
+
void xe_debugfs_register(struct xe_device *xe)
{
struct ttm_device *bdev = &xe->ttm;
@@ -135,6 +190,9 @@ void xe_debugfs_register(struct xe_device *xe)
debugfs_create_file("forcewake_all", 0400, root, xe,
&forcewake_all_fops);
+ debugfs_create_file("wedged_mode", 0400, root, xe,
+ &wedged_mode_fops);
+
for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
man = ttm_manager_type(bdev, mem_type);
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index c3267a21957b..f89c781986a5 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -17,6 +17,7 @@
#include <drm/xe_drm.h>
#include "display/xe_display.h"
+#include "instructions/xe_gpu_commands.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
@@ -27,6 +28,7 @@
#include "xe_drv.h"
#include "xe_exec.h"
#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gsc_proxy.h"
#include "xe_gt.h"
@@ -138,6 +140,9 @@ static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct xe_device *xe = to_xe_device(file_priv->minor->dev);
long ret;
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
ret = xe_pm_runtime_get_ioctl(xe);
if (ret >= 0)
ret = drm_ioctl(file, cmd, arg);
@@ -153,6 +158,9 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo
struct xe_device *xe = to_xe_device(file_priv->minor->dev);
long ret;
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
ret = xe_pm_runtime_get_ioctl(xe);
if (ret >= 0)
ret = drm_compat_ioctl(file, cmd, arg);
@@ -269,7 +277,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
init_waitqueue_head(&xe->ufence_wq);
- drmm_mutex_init(&xe->drm, &xe->usm.lock);
+ err = drmm_mutex_init(&xe->drm, &xe->usm.lock);
+ if (err)
+ goto err;
+
xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
@@ -501,6 +512,8 @@ int xe_device_probe_early(struct xe_device *xe)
if (err)
return err;
+ xe->wedged.mode = xe_modparam.wedged_mode;
+
return 0;
}
@@ -808,3 +821,34 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
{
return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
}
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a mudule
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+ if (xe->wedged.mode == 0) {
+ drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
+ return;
+ }
+
+ if (!atomic_xchg(&xe->wedged.flag, 1)) {
+ xe->needs_flr_on_fini = true;
+ drm_err(&xe->drm,
+ "CRITICAL: Xe has declared device %s as wedged.\n"
+ "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+ "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+ dev_name(xe->drm.dev));
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 7524a71c0d84..bb07f5669dbb 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -6,15 +6,9 @@
#ifndef _XE_DEVICE_H_
#define _XE_DEVICE_H_
-struct xe_exec_queue;
-struct xe_file;
-
#include <drm/drm_util.h>
-#include "regs/xe_gpu_commands.h"
#include "xe_device_types.h"
-#include "xe_force_wake.h"
-#include "xe_macros.h"
static inline struct xe_device *to_xe_device(const struct drm_device *dev)
{
@@ -169,4 +163,11 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
void xe_device_td_flush(struct xe_device *xe);
+static inline bool xe_device_wedged(struct xe_device *xe)
+{
+ return atomic_read(&xe->wedged.flag);
+}
+
+void xe_device_declare_wedged(struct xe_device *xe);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 7674afbb3a30..5c5e36de452a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -281,6 +281,10 @@ struct xe_device {
u8 has_heci_gscfi:1;
/** @info.skip_guc_pc: Skip GuC based PM feature init */
u8 skip_guc_pc:1;
+ /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
+ u8 has_atomic_enable_pte_bit:1;
+ /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
+ u8 has_device_atomics_on_smem:1;
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
struct {
@@ -459,6 +463,14 @@ struct xe_device {
/** @needs_flr_on_fini: requests function-reset on fini */
bool needs_flr_on_fini;
+ /** @wedged: Struct to control Wedged States and mode */
+ struct {
+ /** @wedged.flag: Xe device faced a critical error and is now blocked. */
+ atomic_t flag;
+ /** @wedged.mode: Mode controlled by kernel parameter and debugfs */
+ int mode;
+ } wedged;
+
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 02ce8d204622..48f6da53a292 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -26,6 +26,15 @@ void xe_exec_queue_fini(struct xe_exec_queue *q);
void xe_exec_queue_destroy(struct kref *ref);
void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance);
+static inline struct xe_exec_queue *
+xe_exec_queue_get_unless_zero(struct xe_exec_queue *q)
+{
+ if (kref_get_unless_zero(&q->refcount))
+ return q;
+
+ return NULL;
+}
+
struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id);
static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index dece2785933c..e9dee1e14fef 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -9,7 +9,6 @@
#include "instructions/xe_mi_commands.h"
#include "regs/xe_engine_regs.h"
-#include "regs/xe_gpu_commands.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_lrc_layout.h"
#include "xe_assert.h"
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 60202b903687..8cc6420a9e7f 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -14,6 +14,7 @@
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
#include "xe_gsc_proxy.h"
#include "xe_gsc_submit.h"
#include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 1b908d238bd1..6d6d1068cf23 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -15,6 +15,7 @@
#include "abi/gsc_proxy_commands_abi.h"
#include "regs/xe_gsc_regs.h"
#include "xe_bo.h"
+#include "xe_force_wake.h"
#include "xe_gsc.h"
#include "xe_gsc_submit.h"
#include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
index d34d03248843..9ede483d37ef 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.c
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -8,6 +8,7 @@
#include <linux/poison.h>
#include "abi/gsc_command_header_abi.h"
+#include "xe_assert.h"
#include "xe_bb.h"
#include "xe_exec_queue.h"
#include "xe_gt_printk.h"
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 491d0413de15..36c7b1631fa6 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
/* Big enough to emit all of the context's 3DSTATE */
- bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false);
+ bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false);
else
/* Just pick a large BB size */
bb = xe_bb_new(gt, SZ_4K, false);
@@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
xe_tuning_process_lrc(hwe);
default_lrc = drmm_kzalloc(&xe->drm,
- xe_lrc_size(xe, hwe->class),
+ xe_gt_lrc_size(gt, hwe->class),
GFP_KERNEL);
if (!default_lrc)
return -ENOMEM;
@@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
xe_map_memcpy_from(xe, default_lrc,
&q->lrc[0].bo->vmap,
xe_lrc_pphwsp_offset(&q->lrc[0]),
- xe_lrc_size(xe, hwe->class));
+ xe_gt_lrc_size(gt, hwe->class));
gt->default_lrc[hwe->class] = default_lrc;
put_nop_q:
@@ -477,6 +477,9 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
+ xe_gt_sriov_pf_init_hw(gt);
+
err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
XE_WARN_ON(err);
@@ -613,6 +616,9 @@ static int do_gt_restart(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
+ xe_gt_sriov_pf_init_hw(gt);
+
xe_mocs_init(gt);
err = xe_uc_start(&gt->uc);
if (err)
@@ -633,6 +639,9 @@ static int gt_reset(struct xe_gt *gt)
{
int err;
+ if (xe_device_wedged(gt_to_xe(gt)))
+ return -ECANCELED;
+
/* We only support GT resets with GuC submission */
if (!xe_device_uc_enabled(gt_to_xe(gt)))
return -ENODEV;
@@ -655,9 +664,7 @@ static int gt_reset(struct xe_gt *gt)
xe_uc_stop_prepare(&gt->uc);
xe_gt_pagefault_reset(gt);
- err = xe_uc_stop(&gt->uc);
- if (err)
- goto err_out;
+ xe_uc_stop(&gt->uc);
xe_gt_tlb_invalidation_reset(gt);
@@ -685,7 +692,7 @@ err_msg:
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
- gt_to_xe(gt)->needs_flr_on_fini = true;
+ xe_device_declare_wedged(gt_to_xe(gt));
return err;
}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index ed6ea8057e35..8474c50b1b30 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -8,6 +8,7 @@
#include <drm/drm_util.h>
+#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_hw_engine.h"
@@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
u16 instance,
bool logical);
+static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt)
+{
+ return gt->info.has_indirect_ring_state &&
+ xe_device_uc_enabled(gt_to_xe(gt));
+}
+
static inline bool xe_gt_is_media_type(struct xe_gt *gt)
{
return gt->info.type == XE_GT_TYPE_MEDIA;
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 396aeb5b9924..a34c9a24dafc 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -68,8 +68,8 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
xe_mmio_write32(gt, CCS_MODE, mode);
- xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
- mode, config, num_engines, num_slices);
+ xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
+ mode, config, num_engines, num_slices);
}
void xe_gt_apply_ccs_mode(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index c7bca20f6b65..9ff2061133df 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -7,6 +7,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
+#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_macros.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 8cf0b2625efc..c5e562e143fd 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -15,14 +15,17 @@
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
+#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_topology.h"
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_macros.h"
+#include "xe_mocs.h"
#include "xe_pat.h"
#include "xe_pm.h"
#include "xe_reg_sr.h"
#include "xe_reg_whitelist.h"
+#include "xe_sriov.h"
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
@@ -200,6 +203,15 @@ static int pat(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
+static int mocs(struct xe_gt *gt, struct drm_printer *p)
+{
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_mocs_dump(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
+
+ return 0;
+}
+
static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
xe_pm_runtime_get(gt_to_xe(gt));
@@ -255,6 +267,7 @@ static const struct drm_info_list debugfs_list[] = {
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
+ {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
{"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
{"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
@@ -290,4 +303,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
root, minor);
xe_uc_debugfs_register(&gt->uc, root);
+
+ if (IS_SRIOV_PF(xe))
+ xe_gt_sriov_pf_debugfs_register(gt, root);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 8fc0f3f6ecc5..a4f6f0a96d05 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -5,6 +5,7 @@
#include <drm/drm_managed.h>
+#include "xe_force_wake.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_idle.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index a7f4ab1aa584..e7d03e001a49 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -40,4 +40,28 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
for_each_dss((dss), (gt)) \
for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true))
+/*
+ * Loop over each DSS available for geometry and determine the group and
+ * instance IDs that should be used to steer MCR accesses toward this DSS.
+ * @dss: DSS ID to obtain steering for
+ * @gt: GT structure
+ * @group: steering group ID, data type: u16
+ * @instance: steering instance ID, data type: u16
+ */
+#define for_each_geometry_dss(dss, gt, group, instance) \
+ for_each_dss_steering(dss, gt, group, instance) \
+ if (xe_gt_has_geometry_dss(gt, dss))
+
+/*
+ * Loop over each DSS available for compute and determine the group and
+ * instance IDs that should be used to steer MCR accesses toward this DSS.
+ * @dss: DSS ID to obtain steering for
+ * @gt: GT structure
+ * @group: steering group ID, data type: u16
+ * @instance: steering instance ID, data type: u16
+ */
+#define for_each_compute_dss(dss, gt, group, instance) \
+ for_each_dss_steering(dss, gt, group, instance) \
+ if (xe_gt_has_compute_dss(gt, dss))
+
#endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index fa9e9853c53b..040dd142c49c 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -19,7 +19,6 @@
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_migrate.h"
-#include "xe_pt.h"
#include "xe_trace.h"
#include "xe_vm.h"
@@ -204,15 +203,14 @@ retry_userptr:
drm_exec_retry_on_contention(&exec);
if (ret)
goto unlock_dma_resv;
- }
- /* Bind VMA only to the GT that has faulted */
- trace_xe_vma_pf_bind(vma);
- fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
- vma->tile_present & BIT(tile->id));
- if (IS_ERR(fence)) {
- ret = PTR_ERR(fence);
- goto unlock_dma_resv;
+ /* Bind VMA only to the GT that has faulted */
+ trace_xe_vma_pf_bind(vma);
+ fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ goto unlock_dma_resv;
+ }
}
/*
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 791dcdd767e2..7decf71c2b7d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -5,8 +5,12 @@
#include <drm/drm_managed.h>
+#include "regs/xe_sriov_regs.h"
+
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_service.h"
+#include "xe_mmio.h"
/*
* VF's metadata is maintained in the flexible array where:
@@ -48,5 +52,33 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
if (err)
return err;
+ err = xe_gt_sriov_pf_service_init(gt);
+ if (err)
+ return err;
+
return 0;
}
+
+static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) == 1200;
+}
+
+static void pf_enable_ggtt_guest_update(struct xe_gt *gt)
+{
+ xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
+}
+
+/**
+ * xe_gt_sriov_pf_init_hw - Initialize SR-IOV hardware support.
+ * @gt: the &xe_gt to initialize
+ *
+ * On some platforms the PF must explicitly enable VF's access to the GGTT.
+ */
+void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
+{
+ if (pf_needs_enable_ggtt_guest_update(gt_to_xe(gt)))
+ pf_enable_ggtt_guest_update(gt);
+
+ xe_gt_sriov_pf_service_update(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index 05142ffc4319..37d7d6c3df03 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -10,11 +10,16 @@ struct xe_gt;
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
+void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
{
return 0;
}
+
+static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
+{
+}
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 79116ad58620..7eac01e04cc5 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -187,14 +187,20 @@ static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u3
return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
}
-static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum)
+static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 *exec_quantum)
{
- return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum);
+ /* GuC will silently clamp values exceeding max */
+ *exec_quantum = min_t(u32, *exec_quantum, GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE);
+
+ return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, *exec_quantum);
}
-static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout)
+static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 *preempt_timeout)
{
- return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout);
+ /* GuC will silently clamp values exceeding max */
+ *preempt_timeout = min_t(u32, *preempt_timeout, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE);
+
+ return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout);
}
static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
@@ -1604,7 +1610,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid,
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
int err;
- err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum);
+ err = pf_push_vf_cfg_exec_quantum(gt, vfid, &exec_quantum);
if (unlikely(err))
return err;
@@ -1674,7 +1680,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
int err;
- err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout);
+ err = pf_push_vf_cfg_preempt_timeout(gt, vfid, &preempt_timeout);
if (unlikely(err))
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
new file mode 100644
index 000000000000..5102035faa7e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include <drm/drm_print.h>
+#include <drm/drm_debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_debugfs.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_debugfs.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_policy.h"
+#include "xe_gt_sriov_pf_service.h"
+#include "xe_pm.h"
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0 # d_inode->i_private = gt
+ * │   ├── pf # d_inode->i_private = gt
+ * │   ├── vf1 # d_inode->i_private = VFID(1)
+ * :   :
+ * │   ├── vfN # d_inode->i_private = VFID(N)
+ */
+
+static void *extract_priv(struct dentry *d)
+{
+ return d->d_inode->i_private;
+}
+
+static struct xe_gt *extract_gt(struct dentry *d)
+{
+ return extract_priv(d->d_parent);
+}
+
+static unsigned int extract_vfid(struct dentry *d)
+{
+ return extract_priv(d) == extract_gt(d) ? PFID : (uintptr_t)extract_priv(d);
+}
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── pf
+ * │   │   ├── ggtt_available
+ * │   │   ├── ggtt_provisioned
+ * │   │   ├── contexts_provisioned
+ * │   │   ├── doorbells_provisioned
+ * │   │   ├── runtime_registers
+ * │   │   ├── negotiated_versions
+ */
+
+static const struct drm_info_list pf_info[] = {
+ {
+ "ggtt_available",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_available_ggtt,
+ },
+ {
+ "ggtt_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_ggtt,
+ },
+ {
+ "contexts_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_ctxs,
+ },
+ {
+ "doorbells_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_dbs,
+ },
+ {
+ "runtime_registers",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_service_print_runtime,
+ },
+ {
+ "negotiated_versions",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_service_print_version,
+ },
+};
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── pf
+ * │   │   ├── reset_engine
+ * │   │   ├── sample_period
+ * │   │   ├── sched_if_idle
+ */
+
+#define DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(POLICY, TYPE, FORMAT) \
+ \
+static int POLICY##_set(void *data, u64 val) \
+{ \
+ struct xe_gt *gt = extract_gt(data); \
+ struct xe_device *xe = gt_to_xe(gt); \
+ int err; \
+ \
+ if (val > (TYPE)~0ull) \
+ return -EOVERFLOW; \
+ \
+ xe_pm_runtime_get(xe); \
+ err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \
+ xe_pm_runtime_put(xe); \
+ \
+ return err; \
+} \
+ \
+static int POLICY##_get(void *data, u64 *val) \
+{ \
+ struct xe_gt *gt = extract_gt(data); \
+ \
+ *val = xe_gt_sriov_pf_policy_get_##POLICY(gt); \
+ return 0; \
+} \
+ \
+DEFINE_DEBUGFS_ATTRIBUTE(POLICY##_fops, POLICY##_get, POLICY##_set, FORMAT)
+
+DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(reset_engine, bool, "%llu\n");
+DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sched_if_idle, bool, "%llu\n");
+DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sample_period, u32, "%llu\n");
+
+static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
+{
+ xe_gt_assert(gt, gt == extract_gt(parent));
+ xe_gt_assert(gt, PFID == extract_vfid(parent));
+
+ debugfs_create_file_unsafe("reset_engine", 0644, parent, parent, &reset_engine_fops);
+ debugfs_create_file_unsafe("sched_if_idle", 0644, parent, parent, &sched_if_idle_fops);
+ debugfs_create_file_unsafe("sample_period_ms", 0644, parent, parent, &sample_period_fops);
+}
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── pf
+ * │   │   ├── ggtt_spare
+ * │   │   ├── lmem_spare
+ * │   │   ├── doorbells_spare
+ * │   │   ├── contexts_spare
+ * │   │   ├── exec_quantum_ms
+ * │   │   ├── preempt_timeout_us
+ * │   ├── vf1
+ * │   │   ├── ggtt_quota
+ * │   │   ├── lmem_quota
+ * │   │   ├── doorbells_quota
+ * │   │   ├── contexts_quota
+ * │   │   ├── exec_quantum_ms
+ * │   │   ├── preempt_timeout_us
+ */
+
+#define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \
+ \
+static int CONFIG##_set(void *data, u64 val) \
+{ \
+ struct xe_gt *gt = extract_gt(data); \
+ unsigned int vfid = extract_vfid(data); \
+ struct xe_device *xe = gt_to_xe(gt); \
+ int err; \
+ \
+ if (val > (TYPE)~0ull) \
+ return -EOVERFLOW; \
+ \
+ xe_pm_runtime_get(xe); \
+ err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
+ xe_pm_runtime_put(xe); \
+ \
+ return err; \
+} \
+ \
+static int CONFIG##_get(void *data, u64 *val) \
+{ \
+ struct xe_gt *gt = extract_gt(data); \
+ unsigned int vfid = extract_vfid(data); \
+ \
+ *val = xe_gt_sriov_pf_config_get_##CONFIG(gt, vfid); \
+ return 0; \
+} \
+ \
+DEFINE_DEBUGFS_ATTRIBUTE(CONFIG##_fops, CONFIG##_get, CONFIG##_set, FORMAT)
+
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, u64, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(lmem, u64, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n");
+
+static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigned int vfid)
+{
+ xe_gt_assert(gt, gt == extract_gt(parent));
+ xe_gt_assert(gt, vfid == extract_vfid(parent));
+
+ if (!xe_gt_is_media_type(gt)) {
+ debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare",
+ 0644, parent, parent, &ggtt_fops);
+ if (IS_DGFX(gt_to_xe(gt)))
+ debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare",
+ 0644, parent, parent, &lmem_fops);
+ }
+ debugfs_create_file_unsafe(vfid ? "doorbells_quota" : "doorbells_spare",
+ 0644, parent, parent, &dbs_fops);
+ debugfs_create_file_unsafe(vfid ? "contexts_quota" : "contexts_spare",
+ 0644, parent, parent, &ctxs_fops);
+ debugfs_create_file_unsafe("exec_quantum_ms", 0644, parent, parent,
+ &exec_quantum_fops);
+ debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent,
+ &preempt_timeout_fops);
+}
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── vf1
+ * │   │   ├── control { stop, pause, resume }
+ */
+
+static const struct {
+ const char *cmd;
+ int (*fn)(struct xe_gt *gt, unsigned int vfid);
+} control_cmds[] = {
+ { "stop", xe_gt_sriov_pf_control_stop_vf },
+ { "pause", xe_gt_sriov_pf_control_pause_vf },
+ { "resume", xe_gt_sriov_pf_control_resume_vf },
+};
+
+static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int vfid = extract_vfid(parent);
+ int ret = -EINVAL;
+ char cmd[32];
+ size_t n;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+ if (*pos)
+ return -ESPIPE;
+
+ if (count > sizeof(cmd) - 1)
+ return -EINVAL;
+
+ ret = simple_write_to_buffer(cmd, sizeof(cmd) - 1, pos, buf, count);
+ if (ret < 0)
+ return ret;
+ cmd[ret] = '\0';
+
+ for (n = 0; n < ARRAY_SIZE(control_cmds); n++) {
+ xe_gt_assert(gt, sizeof(cmd) > strlen(control_cmds[n].cmd));
+
+ if (sysfs_streq(cmd, control_cmds[n].cmd)) {
+ xe_pm_runtime_get(xe);
+ ret = control_cmds[n].fn ? (*control_cmds[n].fn)(gt, vfid) : 0;
+ xe_pm_runtime_put(xe);
+ break;
+ }
+ }
+
+ return (ret < 0) ? ret : count;
+}
+
+static ssize_t control_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ char help[128];
+ size_t n;
+
+ help[0] = '\0';
+ for (n = 0; n < ARRAY_SIZE(control_cmds); n++) {
+ strlcat(help, control_cmds[n].cmd, sizeof(help));
+ strlcat(help, "\n", sizeof(help));
+ }
+
+ return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
+}
+
+static const struct file_operations control_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = control_write,
+ .read = control_read,
+ .llseek = default_llseek,
+};
+
+/**
+ * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
+ * @gt: the &xe_gt to register
+ * @root: the &dentry that represents the GT directory
+ *
+ * Register SR-IOV PF entries that are GT related and must be shown under GT debugfs.
+ */
+void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct drm_minor *minor = xe->drm.primary;
+ int n, totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ struct dentry *pfdentry;
+ struct dentry *vfdentry;
+ char buf[14]; /* should be enough up to "vf%u\0" for 2^32 - 1 */
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+ xe_gt_assert(gt, root->d_inode->i_private == gt);
+
+ /*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── pf
+ */
+ pfdentry = debugfs_create_dir("pf", root);
+ if (IS_ERR(pfdentry))
+ return;
+ pfdentry->d_inode->i_private = gt;
+
+ drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor);
+ pf_add_policy_attrs(gt, pfdentry);
+ pf_add_config_attrs(gt, pfdentry, PFID);
+
+ for (n = 1; n <= totalvfs; n++) {
+ /*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── vf1
+ * │   ├── vf2
+ */
+ snprintf(buf, sizeof(buf), "vf%u", n);
+ vfdentry = debugfs_create_dir(buf, root);
+ if (IS_ERR(vfdentry))
+ break;
+ vfdentry->d_inode->i_private = (void *)(uintptr_t)n;
+
+ pf_add_config_attrs(gt, vfdentry, VFID(n));
+ debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops);
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h
new file mode 100644
index 000000000000..038cc8ddc244
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_DEBUGFS_H_
+#define _XE_GT_SRIOV_PF_DEBUGFS_H_
+
+struct xe_gt;
+struct dentry;
+
+#ifdef CONFIG_PCI_IOV
+void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root);
+#else
+static inline void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) { }
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
new file mode 100644
index 000000000000..0e23b7ea4f3e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_relay_actions_abi.h"
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "regs/xe_regs.h"
+
+#include "xe_mmio.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_service.h"
+#include "xe_gt_sriov_pf_service_types.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hxg_helpers.h"
+
+static void pf_init_versions(struct xe_gt *gt)
+{
+ BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR);
+ BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR);
+
+ /* base versions may differ between platforms */
+ gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR;
+ gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR;
+
+ /* latest version is same for all platforms */
+ gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR;
+ gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR;
+}
+
+/* Return: 0 on success or a negative error code on failure. */
+static int pf_negotiate_version(struct xe_gt *gt,
+ u32 wanted_major, u32 wanted_minor,
+ u32 *major, u32 *minor)
+{
+ struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base;
+ struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest;
+
+ xe_gt_assert(gt, base.major);
+ xe_gt_assert(gt, base.major <= latest.major);
+ xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor));
+
+ /* VF doesn't care - return our latest */
+ if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY &&
+ wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) {
+ *major = latest.major;
+ *minor = latest.minor;
+ return 0;
+ }
+
+ /* VF wants newer than our - return our latest */
+ if (wanted_major > latest.major) {
+ *major = latest.major;
+ *minor = latest.minor;
+ return 0;
+ }
+
+ /* VF wants older than min required - reject */
+ if (wanted_major < base.major ||
+ (wanted_major == base.major && wanted_minor < base.minor)) {
+ return -EPERM;
+ }
+
+ /* previous major - return wanted, as we should still support it */
+ if (wanted_major < latest.major) {
+ /* XXX: we are not prepared for multi-versions yet */
+ xe_gt_assert(gt, base.major == latest.major);
+ return -ENOPKG;
+ }
+
+ /* same major - return common minor */
+ *major = wanted_major;
+ *minor = min_t(u32, latest.minor, wanted_minor);
+ return 0;
+}
+
+static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor)
+{
+ xe_gt_sriov_pf_assert_vfid(gt, vfid);
+ xe_gt_assert(gt, major || minor);
+
+ gt->sriov.pf.vfs[vfid].version.major = major;
+ gt->sriov.pf.vfs[vfid].version.minor = minor;
+}
+
+static void pf_disconnect(struct xe_gt *gt, u32 vfid)
+{
+ xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+ gt->sriov.pf.vfs[vfid].version.major = 0;
+ gt->sriov.pf.vfs[vfid].version.minor = 0;
+}
+
+static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor)
+{
+ xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+ return major == gt->sriov.pf.vfs[vfid].version.major &&
+ minor <= gt->sriov.pf.vfs[vfid].version.minor;
+}
+
+static const struct xe_reg tgl_runtime_regs[] = {
+ RPM_CONFIG0, /* _MMIO(0x0d00) */
+ MIRROR_FUSE3, /* _MMIO(0x9118) */
+ XELP_EU_ENABLE, /* _MMIO(0x9134) */
+ XELP_GT_SLICE_ENABLE, /* _MMIO(0x9138) */
+ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
+ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
+ CTC_MODE, /* _MMIO(0xa26c) */
+ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
+ TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
+};
+
+static const struct xe_reg ats_m_runtime_regs[] = {
+ RPM_CONFIG0, /* _MMIO(0x0d00) */
+ MIRROR_FUSE3, /* _MMIO(0x9118) */
+ MIRROR_FUSE1, /* _MMIO(0x911c) */
+ XELP_EU_ENABLE, /* _MMIO(0x9134) */
+ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
+ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
+ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
+ CTC_MODE, /* _MMIO(0xa26c) */
+ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
+ TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
+};
+
+static const struct xe_reg pvc_runtime_regs[] = {
+ RPM_CONFIG0, /* _MMIO(0x0d00) */
+ MIRROR_FUSE3, /* _MMIO(0x9118) */
+ XELP_EU_ENABLE, /* _MMIO(0x9134) */
+ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
+ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
+ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
+ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
+ CTC_MODE, /* _MMIO(0xA26C) */
+ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
+ TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
+};
+
+static const struct xe_reg ver_1270_runtime_regs[] = {
+ RPM_CONFIG0, /* _MMIO(0x0d00) */
+ XEHP_FUSE4, /* _MMIO(0x9114) */
+ MIRROR_FUSE3, /* _MMIO(0x9118) */
+ MIRROR_FUSE1, /* _MMIO(0x911c) */
+ XELP_EU_ENABLE, /* _MMIO(0x9134) */
+ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
+ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
+ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
+ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
+ CTC_MODE, /* _MMIO(0xa26c) */
+ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
+ TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
+};
+
+static const struct xe_reg ver_2000_runtime_regs[] = {
+ RPM_CONFIG0, /* _MMIO(0x0d00) */
+ XEHP_FUSE4, /* _MMIO(0x9114) */
+ MIRROR_FUSE3, /* _MMIO(0x9118) */
+ MIRROR_FUSE1, /* _MMIO(0x911c) */
+ XELP_EU_ENABLE, /* _MMIO(0x9134) */
+ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
+ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
+ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
+ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
+ XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */
+ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */
+ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */
+ CTC_MODE, /* _MMIO(0xa26c) */
+ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
+ TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
+};
+
+static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count)
+{
+ const struct xe_reg *regs;
+
+ if (GRAPHICS_VERx100(xe) >= 2000) {
+ *count = ARRAY_SIZE(ver_2000_runtime_regs);
+ regs = ver_2000_runtime_regs;
+ } else if (GRAPHICS_VERx100(xe) >= 1270) {
+ *count = ARRAY_SIZE(ver_1270_runtime_regs);
+ regs = ver_1270_runtime_regs;
+ } else if (GRAPHICS_VERx100(xe) == 1260) {
+ *count = ARRAY_SIZE(pvc_runtime_regs);
+ regs = pvc_runtime_regs;
+ } else if (GRAPHICS_VERx100(xe) == 1255) {
+ *count = ARRAY_SIZE(ats_m_runtime_regs);
+ regs = ats_m_runtime_regs;
+ } else if (GRAPHICS_VERx100(xe) == 1200) {
+ *count = ARRAY_SIZE(tgl_runtime_regs);
+ regs = tgl_runtime_regs;
+ } else {
+ regs = ERR_PTR(-ENOPKG);
+ *count = 0;
+ }
+
+ return regs;
+}
+
+static int pf_alloc_runtime_info(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ const struct xe_reg *regs;
+ unsigned int size;
+ u32 *values;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+ xe_gt_assert(gt, !gt->sriov.pf.service.runtime.size);
+ xe_gt_assert(gt, !gt->sriov.pf.service.runtime.regs);
+ xe_gt_assert(gt, !gt->sriov.pf.service.runtime.values);
+
+ regs = pick_runtime_regs(xe, &size);
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
+
+ if (unlikely(!size))
+ return 0;
+
+ values = drmm_kcalloc(&xe->drm, size, sizeof(u32), GFP_KERNEL);
+ if (!values)
+ return -ENOMEM;
+
+ gt->sriov.pf.service.runtime.size = size;
+ gt->sriov.pf.service.runtime.regs = regs;
+ gt->sriov.pf.service.runtime.values = values;
+
+ return 0;
+}
+
+static void read_many(struct xe_gt *gt, unsigned int count,
+ const struct xe_reg *regs, u32 *values)
+{
+ while (count--)
+ *values++ = xe_mmio_read32(gt, *regs++);
+}
+
+static void pf_prepare_runtime_info(struct xe_gt *gt)
+{
+ const struct xe_reg *regs;
+ unsigned int size;
+ u32 *values;
+
+ if (!gt->sriov.pf.service.runtime.size)
+ return;
+
+ size = gt->sriov.pf.service.runtime.size;
+ regs = gt->sriov.pf.service.runtime.regs;
+ values = gt->sriov.pf.service.runtime.values;
+
+ read_many(gt, size, regs, values);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ struct drm_printer p = xe_gt_info_printer(gt);
+
+ xe_gt_sriov_pf_service_print_runtime(gt, &p);
+ }
+}
+
+/**
+ * xe_gt_sriov_pf_service_init - Early initialization of the GT SR-IOV PF services.
+ * @gt: the &xe_gt to initialize
+ *
+ * Performs early initialization of the GT SR-IOV PF services, including preparation
+ * of the runtime info that will be shared with VFs.
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_service_init(struct xe_gt *gt)
+{
+ int err;
+
+ pf_init_versions(gt);
+
+ err = pf_alloc_runtime_info(gt);
+ if (unlikely(err))
+ goto failed;
+
+ return 0;
+failed:
+ xe_gt_sriov_err(gt, "Failed to initialize service (%pe)\n", ERR_PTR(err));
+ return err;
+}
+
+/**
+ * xe_gt_sriov_pf_service_update - Update PF SR-IOV services.
+ * @gt: the &xe_gt to update
+ *
+ * Updates runtime data shared with VFs.
+ *
+ * This function can be called more than once.
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_service_update(struct xe_gt *gt)
+{
+ pf_prepare_runtime_info(gt);
+}
+
+/**
+ * xe_gt_sriov_pf_service_reset - Reset a connection with the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * Reset a VF driver negotiated VF/PF ABI version.
+ * After that point, the VF driver will have to perform new version handshake
+ * to continue use of the PF services again.
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid)
+{
+ pf_disconnect(gt, vfid);
+}
+
+/* Return: 0 on success or a negative error code on failure. */
+static int pf_process_handshake(struct xe_gt *gt, u32 vfid,
+ u32 wanted_major, u32 wanted_minor,
+ u32 *major, u32 *minor)
+{
+ int err;
+
+ xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n",
+ vfid, wanted_major, wanted_minor);
+
+ err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor);
+
+ if (err < 0) {
+ xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n",
+ vfid, wanted_major, wanted_minor, ERR_PTR(err));
+ pf_disconnect(gt, vfid);
+ } else {
+ xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n",
+ vfid, *major, *minor);
+ pf_connect(gt, vfid, *major, *minor);
+ }
+
+ return 0;
+}
+
+/* Return: length of the response message or a negative error code on failure. */
+static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin,
+ const u32 *request, u32 len, u32 *response, u32 size)
+{
+ u32 wanted_major, wanted_minor;
+ u32 major, minor;
+ u32 mbz;
+ int err;
+
+ if (unlikely(len != VF2PF_HANDSHAKE_REQUEST_MSG_LEN))
+ return -EMSGSIZE;
+
+ mbz = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ, request[0]);
+ if (unlikely(mbz))
+ return -EPFNOSUPPORT;
+
+ wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]);
+ wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]);
+
+ err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor);
+ if (err < 0)
+ return err;
+
+ xe_gt_assert(gt, major || minor);
+ xe_gt_assert(gt, size >= VF2PF_HANDSHAKE_RESPONSE_MSG_LEN);
+
+ response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) |
+ FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, 0);
+ response[1] = FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, major) |
+ FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, minor);
+
+ return VF2PF_HANDSHAKE_RESPONSE_MSG_LEN;
+}
+
+struct reg_data {
+ u32 offset;
+ u32 value;
+} __packed;
+static_assert(hxg_sizeof(struct reg_data) == 2);
+
+/* Return: number of entries copied or negative error code on failure. */
+static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit,
+ struct reg_data *data, u32 *remaining)
+{
+ struct xe_gt_sriov_pf_service_runtime_regs *runtime;
+ unsigned int count, i;
+ u32 addr;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ runtime = &gt->sriov.pf.service.runtime;
+
+ if (start > runtime->size)
+ return -ERANGE;
+
+ count = min_t(u32, runtime->size - start, limit);
+
+ for (i = 0; i < count; ++i, ++data) {
+ addr = runtime->regs[start + i].addr;
+ data->offset = xe_mmio_adjusted_addr(gt, addr);
+ data->value = runtime->values[start + i];
+ }
+
+ *remaining = runtime->size - start - count;
+ return count;
+}
+
+/* Return: length of the response message or a negative error code on failure. */
+static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin,
+ const u32 *msg, u32 msg_len, u32 *response, u32 resp_size)
+{
+ const u32 chunk_size = hxg_sizeof(struct reg_data);
+ struct reg_data *reg_data_buf;
+ u32 limit, start, max_chunks;
+ u32 remaining = 0;
+ int ret;
+
+ if (!pf_is_negotiated(gt, origin, 1, 0))
+ return -EACCES;
+ if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN))
+ return -EMSGSIZE;
+ if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN))
+ return -EPROTO;
+ if (unlikely(resp_size < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN))
+ return -EINVAL;
+
+ limit = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, msg[0]);
+ start = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, msg[1]);
+
+ resp_size = min_t(u32, resp_size, VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN);
+ max_chunks = (resp_size - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / chunk_size;
+ limit = limit == VF2PF_QUERY_RUNTIME_NO_LIMIT ? max_chunks : min_t(u32, max_chunks, limit);
+ reg_data_buf = (void *)(response + VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN);
+
+ ret = pf_service_runtime_query(gt, start, limit, reg_data_buf, &remaining);
+ if (ret < 0)
+ return ret;
+
+ response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) |
+ FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, ret);
+ response[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, remaining);
+
+ return VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + ret * hxg_sizeof(struct reg_data);
+}
+
+/**
+ * xe_gt_sriov_pf_service_process_request - Service GT level SR-IOV request message from the VF.
+ * @gt: the &xe_gt that provides the service
+ * @origin: VF number that is requesting the service
+ * @msg: request message
+ * @msg_len: length of the request message (in dwords)
+ * @response: placeholder for the response message
+ * @resp_size: length of the response message buffer (in dwords)
+ *
+ * This function processes `Relay Message`_ request from the VF.
+ *
+ * Return: length of the response message or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin,
+ const u32 *msg, u32 msg_len,
+ u32 *response, u32 resp_size)
+{
+ u32 action, data __maybe_unused;
+ int ret;
+
+ xe_gt_assert(gt, msg_len >= GUC_HXG_MSG_MIN_LEN);
+ xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_REQUEST);
+
+ action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+ data = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+ xe_gt_sriov_dbg_verbose(gt, "service action %#x:%u from VF%u\n",
+ action, data, origin);
+
+ switch (action) {
+ case GUC_RELAY_ACTION_VF2PF_HANDSHAKE:
+ ret = pf_process_handshake_msg(gt, origin, msg, msg_len, response, resp_size);
+ break;
+ case GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME:
+ ret = pf_process_runtime_query_msg(gt, origin, msg, msg_len, response, resp_size);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_service_print_runtime - Print PF runtime data shared with VFs.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * This function is for PF use only.
+ */
+int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p)
+{
+ const struct xe_reg *regs;
+ unsigned int size;
+ u32 *values;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ size = gt->sriov.pf.service.runtime.size;
+ regs = gt->sriov.pf.service.runtime.regs;
+ values = gt->sriov.pf.service.runtime.values;
+
+ for (; size--; regs++, values++) {
+ drm_printf(p, "reg[%#x] = %#x\n",
+ xe_mmio_adjusted_addr(gt, regs->addr), *values);
+ }
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * This function is for PF use only.
+ */
+int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe);
+ struct xe_gt_sriov_pf_service_version *version;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+
+ for (n = 1; n <= total_vfs; n++) {
+ version = &gt->sriov.pf.vfs[n].version;
+ if (!version->major && !version->minor)
+ continue;
+
+ drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor);
+ }
+
+ return 0;
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_gt_sriov_pf_service_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
new file mode 100644
index 000000000000..56aaadf0360d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_SERVICE_H_
+#define _XE_GT_SRIOV_PF_SERVICE_H_
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+int xe_gt_sriov_pf_service_init(struct xe_gt *gt);
+void xe_gt_sriov_pf_service_update(struct xe_gt *gt);
+void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid);
+
+int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p);
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin,
+ const u32 *msg, u32 msg_len,
+ u32 *response, u32 resp_size);
+#else
+static inline int
+xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin,
+ const u32 *msg, u32 msg_len,
+ u32 *response, u32 resp_size)
+{
+ return -EPROTO;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h
new file mode 100644
index 000000000000..ad6dd75f0056
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_SERVICE_TYPES_H_
+#define _XE_GT_SRIOV_PF_SERVICE_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_reg;
+
+/**
+ * struct xe_gt_sriov_pf_service_version - VF/PF ABI Version.
+ * @major: the major version of the VF/PF ABI
+ * @minor: the minor version of the VF/PF ABI
+ *
+ * See `GuC Relay Communication`_.
+ */
+struct xe_gt_sriov_pf_service_version {
+ u16 major;
+ u16 minor;
+};
+
+/**
+ * struct xe_gt_sriov_pf_service_runtime_regs - Runtime data shared with VFs.
+ * @regs: pointer to static array with register offsets.
+ * @values: pointer to array with captured register values.
+ * @size: size of the regs and value arrays.
+ */
+struct xe_gt_sriov_pf_service_runtime_regs {
+ const struct xe_reg *regs;
+ u32 *values;
+ u32 size;
+};
+
+/**
+ * struct xe_gt_sriov_pf_service - Data used by the PF service.
+ * @version: information about VF/PF ABI versions for current platform.
+ * @version.base: lowest VF/PF ABI version that could be negotiated with VF.
+ * @version.latest: latest VF/PF ABI version supported by the PF driver.
+ * @runtime: runtime data shared with VFs.
+ */
+struct xe_gt_sriov_pf_service {
+ struct {
+ struct xe_gt_sriov_pf_service_version base;
+ struct xe_gt_sriov_pf_service_version latest;
+ } version;
+ struct xe_gt_sriov_pf_service_runtime_regs runtime;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index faf9ee8266ce..880754f3e215 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -10,6 +10,7 @@
#include "xe_gt_sriov_pf_config_types.h"
#include "xe_gt_sriov_pf_policy_types.h"
+#include "xe_gt_sriov_pf_service_types.h"
/**
* struct xe_gt_sriov_metadata - GT level per-VF metadata.
@@ -17,15 +18,19 @@
struct xe_gt_sriov_metadata {
/** @config: per-VF provisioning data. */
struct xe_gt_sriov_config config;
+ /** @version: negotiated VF/PF ABI version */
+ struct xe_gt_sriov_pf_service_version version;
};
/**
* struct xe_gt_sriov_pf - GT level PF virtualization data.
+ * @service: service data.
* @policy: policy data.
* @spare: PF-only provisioning configuration.
* @vfs: metadata for all VFs.
*/
struct xe_gt_sriov_pf {
+ struct xe_gt_sriov_pf_service service;
struct xe_gt_sriov_pf_policy policy;
struct xe_gt_sriov_spare_config spare;
struct xe_gt_sriov_metadata *vfs;
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
index 6c61e6f228a8..335c402b51a6 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
@@ -6,11 +6,8 @@
#ifndef _XE_GT_THROTTLE_SYSFS_H_
#define _XE_GT_THROTTLE_SYSFS_H_
-#include <drm/drm_managed.h>
-
struct xe_gt;
int xe_gt_throttle_sysfs_init(struct xe_gt *gt);
-#endif /* _XE_GT_THROTTLE_SYSFS_H_ */
-
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 93df2d7969b3..105797776a6c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -7,6 +7,7 @@
#include "abi/guc_actions_abi.h"
#include "xe_device.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
@@ -245,7 +246,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
return seqno;
xe_gt_tlb_invalidation_wait(gt, seqno);
- } else if (xe_device_uc_enabled(xe)) {
+ } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
@@ -263,11 +264,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
}
/**
- * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
+ * address range
+ *
* @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation
* completion, can be NULL
- * @vma: VMA to invalidate
+ * @start: start address
+ * @end: end address
+ * @asid: address space id
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
* TLB invalidation. Completion of TLB is asynchronous and caller can either use
@@ -277,17 +282,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
* negative error code on error.
*/
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- struct xe_vma *vma)
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ u64 start, u64 end, u32 asid)
{
struct xe_device *xe = gt_to_xe(gt);
#define MAX_TLB_INVALIDATION_LEN 7
u32 action[MAX_TLB_INVALIDATION_LEN];
int len = 0;
- xe_gt_assert(gt, vma);
-
/* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist) {
if (fence)
@@ -301,9 +304,9 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
if (!xe->info.has_range_tlb_invalidation) {
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
} else {
- u64 start = xe_vma_start(vma);
- u64 length = xe_vma_size(vma);
- u64 align, end;
+ u64 orig_start = start;
+ u64 length = end - start;
+ u64 align;
if (length < SZ_4K)
length = SZ_4K;
@@ -315,12 +318,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
* address mask covering the required range.
*/
align = roundup_pow_of_two(length);
- start = ALIGN_DOWN(xe_vma_start(vma), align);
- end = ALIGN(xe_vma_end(vma), align);
+ start = ALIGN_DOWN(start, align);
+ end = ALIGN(end, align);
length = align;
while (start + length < end) {
length <<= 1;
- start = ALIGN_DOWN(xe_vma_start(vma), length);
+ start = ALIGN_DOWN(orig_start, length);
}
/*
@@ -329,16 +332,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
*/
if (length >= SZ_2M) {
length = max_t(u64, SZ_16M, length);
- start = ALIGN_DOWN(xe_vma_start(vma), length);
+ start = ALIGN_DOWN(orig_start, length);
}
xe_gt_assert(gt, length >= SZ_4K);
xe_gt_assert(gt, is_power_of_2(length));
- xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+ xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
+ ilog2(SZ_2M) + 1)));
xe_gt_assert(gt, IS_ALIGNED(start, length));
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
- action[len++] = xe_vma_vm(vma)->usm.asid;
+ action[len++] = asid;
action[len++] = lower_32_bits(start);
action[len++] = upper_32_bits(start);
action[len++] = ilog2(length) - ilog2(SZ_4K);
@@ -350,6 +354,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
}
/**
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ * @vma: VMA to invalidate
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ struct xe_vma *vma)
+{
+ xe_gt_assert(gt, vma);
+
+ return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
+ xe_vma_end(vma),
+ xe_vma_vm(vma)->usm.asid);
+}
+
+/**
* xe_gt_tlb_invalidation_wait - Wait for TLB to complete
* @gt: graphics tile
* @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index fbb743d80d2c..bf3bebd9f985 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
struct xe_vma *vma);
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ u64 start, u64 end, u32 asid);
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 3733e7a6860d..25ff03ab8448 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -108,7 +108,9 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
{
unsigned long bit;
- xe_assert(xe, fls(mask) <= patternbits);
+ xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
+ bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
+ xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
for_each_set_bit(bit, &mask, 32) {
xe_l3_bank_mask_t shifted_pattern = {};
@@ -278,3 +280,13 @@ bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
return quad_first < (quad + 1) * dss_per_quad;
}
+
+bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
+{
+ return test_bit(dss, gt->fuse_topo.g_dss_mask);
+}
+
+bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
+{
+ return test_bit(dss, gt->fuse_topo.c_dss_mask);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index b3e357777a6e..746b325bbf6e 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -33,4 +33,7 @@ bool xe_dss_mask_empty(const xe_dss_mask_t mask);
bool
xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
+bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss);
+bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss);
+
#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index cfdc761ff7f4..8dc203413a27 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -110,8 +110,6 @@ struct xe_gt {
struct {
/** @info.type: type of GT */
enum xe_gt_type type;
- /** @info.id: Unique ID of this GT within the PCI Device */
- u8 id;
/** @info.reference_clock: clock frequency */
u32 reference_clock;
/** @info.engine_mask: mask of engines present on GT */
@@ -124,6 +122,10 @@ struct xe_gt {
u64 __engine_mask;
/** @info.gmdid: raw GMD_ID value from hardware */
u32 gmdid;
+ /** @info.id: Unique ID of this GT within the PCI Device */
+ u8 id;
+ /** @info.has_indirect_ring_state: GT has indirect ring state support */
+ u8 has_indirect_ring_state:1;
} info;
/**
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 240e7a4bbff1..0c9938e0ab8c 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -451,7 +451,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
return 0;
}
-static int guc_wait_ucode(struct xe_guc *guc)
+static void guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
u32 status;
@@ -479,30 +479,26 @@ static int guc_wait_ucode(struct xe_guc *guc)
200000, &status, false);
if (ret) {
- xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status);
- xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
- REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status),
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
-
- if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
- xe_gt_info(gt, "GuC firmware signature verification failed\n");
- ret = -ENOEXEC;
- }
+ xe_gt_err(gt, "GuC load failed: status = 0x%08X\n", status);
+ xe_gt_err(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ REG_FIELD_GET(GS_BOOTROM_MASK, status),
+ REG_FIELD_GET(GS_UKERNEL_MASK, status),
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED)
+ xe_gt_err(gt, "GuC firmware signature verification failed\n");
if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
- XE_GUC_LOAD_STATUS_EXCEPTION) {
- xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n",
- xe_mmio_read32(gt, SOFT_SCRATCH(13)));
- ret = -ENXIO;
- }
+ XE_GUC_LOAD_STATUS_EXCEPTION)
+ xe_gt_err(gt, "GuC firmware exception. EIP: %#x\n",
+ xe_mmio_read32(gt, SOFT_SCRATCH(13)));
+
+ xe_device_declare_wedged(gt_to_xe(gt));
} else {
xe_gt_dbg(gt, "GuC successfully loaded\n");
}
-
- return ret;
}
static int __xe_guc_upload(struct xe_guc *guc)
@@ -532,9 +528,7 @@ static int __xe_guc_upload(struct xe_guc *guc)
goto out;
/* Wait for authentication */
- ret = guc_wait_ucode(guc);
- if (ret)
- goto out;
+ guc_wait_ucode(guc);
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
return 0;
@@ -891,17 +885,11 @@ void xe_guc_stop_prepare(struct xe_guc *guc)
XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
}
-int xe_guc_stop(struct xe_guc *guc)
+void xe_guc_stop(struct xe_guc *guc)
{
- int ret;
-
xe_guc_ct_stop(&guc->ct);
- ret = xe_guc_submit_stop(guc);
- if (ret)
- return ret;
-
- return 0;
+ xe_guc_submit_stop(guc);
}
int xe_guc_start(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 94f2dc5f6f90..a3c92b74a3d5 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -35,7 +35,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p);
int xe_guc_reset_prepare(struct xe_guc *guc);
void xe_guc_reset_wait(struct xe_guc *guc);
void xe_guc_stop_prepare(struct xe_guc *guc);
-int xe_guc_stop(struct xe_guc *guc);
+void xe_guc_stop(struct xe_guc *guc);
int xe_guc_start(struct xe_guc *guc);
bool xe_guc_in_reset(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 7f5a523795c8..9c33cca4e370 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -9,6 +9,7 @@
#include <generated/xe_wa_oob.h>
+#include "abi/guc_actions_abi.h"
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_guc_regs.h"
@@ -16,6 +17,7 @@
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
#include "xe_guc.h"
+#include "xe_guc_ct.h"
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_map.h"
@@ -265,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
{
- struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads);
size_t total_size = 0, alloc_size, real_size;
int class;
@@ -274,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
if (!engine_enable_mask(gt, class))
continue;
- real_size = xe_lrc_size(xe, class);
+ real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size;
}
@@ -440,11 +441,18 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
static void guc_policies_init(struct xe_guc_ads *ads)
{
+ struct xe_device *xe = ads_to_xe(ads);
+ u32 global_flags = 0;
+
ads_blob_write(ads, policies.dpc_promote_time,
GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
ads_blob_write(ads, policies.max_num_work_items,
GLOBAL_POLICY_MAX_NUM_WI);
- ads_blob_write(ads, policies.global_flags, 0);
+
+ if (xe->wedged.mode == 2)
+ global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+
+ ads_blob_write(ads, policies.global_flags, global_flags);
ads_blob_write(ads, policies.is_valid, 1);
}
@@ -765,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
xe_gt_assert(gt, gt->default_lrc[class]);
- real_size = xe_lrc_size(xe, class);
+ real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size;
@@ -799,3 +807,57 @@ void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
{
guc_populate_golden_lrc(ads);
}
+
+static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
+{
+ struct xe_guc_ct *ct = &ads_to_guc(ads)->ct;
+ u32 action[] = {
+ XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
+ policy_offset
+ };
+
+ return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+/**
+ * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy
+ * @ads: Additional data structures object
+ *
+ * This function update the GuC's engine reset policy based on wedged.mode.
+ *
+ * Return: 0 on success, and negative error code otherwise.
+ */
+int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct guc_policies *policies;
+ struct xe_bo *bo;
+ int ret = 0;
+
+ policies = kmalloc(sizeof(*policies), GFP_KERNEL);
+ if (!policies)
+ return -ENOMEM;
+
+ policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
+ policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
+ policies->is_valid = 1;
+ if (xe->wedged.mode == 2)
+ policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+ else
+ policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+
+ bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies),
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ goto out;
+ }
+
+ ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo));
+out:
+ kfree(policies);
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
index 138ef6267671..2e6674c760ff 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -6,12 +6,13 @@
#ifndef _XE_GUC_ADS_H_
#define _XE_GUC_ADS_H_
-#include "xe_guc_ads_types.h"
+struct xe_guc_ads;
int xe_guc_ads_init(struct xe_guc_ads *ads);
int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
void xe_guc_ads_populate(struct xe_guc_ads *ads);
void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
+int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
index 8d9a0287df6b..6767e8076e6b 100644
--- a/drivers/gpu/drm/xe/xe_guc_db_mgr.c
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
@@ -106,7 +106,8 @@ int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count)
if (ret)
return ret;
done:
- xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count);
+ xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell%s\n",
+ dbm->count, str_plural(dbm->count));
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
index 0fb7c6b78c31..cd0549d0ef89 100644
--- a/drivers/gpu/drm/xe/xe_guc_id_mgr.c
+++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
@@ -97,7 +97,8 @@ int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit)
if (ret)
return ret;
- xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total);
+ xe_gt_info(idm_to_gt(idm), "using %u GUC ID%s\n",
+ idm->total, str_plural(idm->total));
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 509649d0e65e..d10aab29651e 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -15,6 +15,7 @@
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_idle.h"
#include "xe_gt_sysfs.h"
@@ -902,6 +903,9 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
return;
}
+ if (xe_device_wedged(xe))
+ return;
+
XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
XE_WARN_ON(xe_guc_pc_stop(pc));
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
index c0a2d8d5d3b3..c3bbaf474f9a 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay.c
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -19,6 +19,7 @@
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_sriov_printk.h"
+#include "xe_gt_sriov_pf_service.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_hxg_helpers.h"
@@ -664,6 +665,7 @@ static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
const u32 *msg, u32 len, u32 *response, u32 size)
{
+ struct xe_gt *gt = relay_to_gt(relay);
u32 type;
int ret;
@@ -674,8 +676,10 @@ static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
- /* XXX: PF services will be added later */
- ret = -EOPNOTSUPP;
+ if (IS_SRIOV_PF(relay_to_xe(relay)))
+ ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size);
+ else
+ ret = -EOPNOTSUPP;
if (type == GUC_HXG_TYPE_EVENT)
relay_assert(relay, ret <= 0);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index c7d38469fb46..fde527d34f58 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -52,13 +52,14 @@ exec_queue_to_guc(struct xe_exec_queue *q)
* engine done being processed).
*/
#define EXEC_QUEUE_STATE_REGISTERED (1 << 0)
-#define ENGINE_STATE_ENABLED (1 << 1)
-#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2)
+#define EXEC_QUEUE_STATE_ENABLED (1 << 1)
+#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2)
#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3)
#define EXEC_QUEUE_STATE_DESTROYED (1 << 4)
-#define ENGINE_STATE_SUSPENDED (1 << 5)
-#define EXEC_QUEUE_STATE_RESET (1 << 6)
-#define ENGINE_STATE_KILLED (1 << 7)
+#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5)
+#define EXEC_QUEUE_STATE_RESET (1 << 6)
+#define EXEC_QUEUE_STATE_KILLED (1 << 7)
+#define EXEC_QUEUE_STATE_WEDGED (1 << 8)
static bool exec_queue_registered(struct xe_exec_queue *q)
{
@@ -77,17 +78,17 @@ static void clear_exec_queue_registered(struct xe_exec_queue *q)
static bool exec_queue_enabled(struct xe_exec_queue *q)
{
- return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED;
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
}
static void set_exec_queue_enabled(struct xe_exec_queue *q)
{
- atomic_or(ENGINE_STATE_ENABLED, &q->guc->state);
+ atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
}
static void clear_exec_queue_enabled(struct xe_exec_queue *q)
{
- atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state);
+ atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
}
static bool exec_queue_pending_enable(struct xe_exec_queue *q)
@@ -142,17 +143,17 @@ static void set_exec_queue_banned(struct xe_exec_queue *q)
static bool exec_queue_suspended(struct xe_exec_queue *q)
{
- return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED;
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
}
static void set_exec_queue_suspended(struct xe_exec_queue *q)
{
- atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state);
+ atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
}
static void clear_exec_queue_suspended(struct xe_exec_queue *q)
{
- atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state);
+ atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
}
static bool exec_queue_reset(struct xe_exec_queue *q)
@@ -167,17 +168,28 @@ static void set_exec_queue_reset(struct xe_exec_queue *q)
static bool exec_queue_killed(struct xe_exec_queue *q)
{
- return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED;
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
}
static void set_exec_queue_killed(struct xe_exec_queue *q)
{
- atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
+ atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
}
-static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
+static bool exec_queue_wedged(struct xe_exec_queue *q)
{
- return exec_queue_killed(q) || exec_queue_banned(q);
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
+}
+
+static void set_exec_queue_wedged(struct xe_exec_queue *q)
+{
+ atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
+}
+
+static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
+{
+ return exec_queue_banned(q) || (atomic_read(&q->guc->state) &
+ (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED));
}
#ifdef CONFIG_PROVE_LOCKING
@@ -240,6 +252,17 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
free_submit_wq(guc);
}
+static void guc_submit_wedged_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc *guc = arg;
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ if (exec_queue_wedged(q))
+ xe_exec_queue_put(q);
+}
+
static const struct xe_exec_queue_ops guc_exec_queue_ops;
static void primelockdep(struct xe_guc *guc)
@@ -250,7 +273,6 @@ static void primelockdep(struct xe_guc *guc)
fs_reclaim_acquire(GFP_KERNEL);
mutex_lock(&guc->submission_state.lock);
- might_lock(&guc->submission_state.suspend.lock);
mutex_unlock(&guc->submission_state.lock);
fs_reclaim_release(GFP_KERNEL);
@@ -278,9 +300,6 @@ int xe_guc_submit_init(struct xe_guc *guc)
xa_init(&guc->submission_state.exec_queue_lookup);
- spin_lock_init(&guc->submission_state.suspend.lock);
- guc->submission_state.suspend.context = dma_fence_context_alloc(1);
-
primelockdep(guc);
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
@@ -430,9 +449,9 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
field_, val_)
-static void __register_mlrc_engine(struct xe_guc *guc,
- struct xe_exec_queue *q,
- struct guc_ctxt_registration_info *info)
+static void __register_mlrc_exec_queue(struct xe_guc *guc,
+ struct xe_exec_queue *q,
+ struct guc_ctxt_registration_info *info)
{
#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
struct xe_device *xe = guc_to_xe(guc);
@@ -469,8 +488,8 @@ static void __register_mlrc_engine(struct xe_guc *guc,
xe_guc_ct_send(&guc->ct, action, len, 0, 0);
}
-static void __register_engine(struct xe_guc *guc,
- struct guc_ctxt_registration_info *info)
+static void __register_exec_queue(struct xe_guc *guc,
+ struct guc_ctxt_registration_info *info)
{
u32 action[] = {
XE_GUC_ACTION_REGISTER_CONTEXT,
@@ -490,7 +509,7 @@ static void __register_engine(struct xe_guc *guc,
xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
}
-static void register_engine(struct xe_exec_queue *q)
+static void register_exec_queue(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
@@ -538,9 +557,9 @@ static void register_engine(struct xe_exec_queue *q)
set_exec_queue_registered(q);
trace_xe_exec_queue_register(q);
if (xe_exec_queue_is_parallel(q))
- __register_mlrc_engine(guc, q, &info);
+ __register_mlrc_exec_queue(guc, q, &info);
else
- __register_engine(guc, &info);
+ __register_exec_queue(guc, &info);
init_policies(guc, q);
}
@@ -658,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
if (xe_exec_queue_is_parallel(q))
wq_item_append(q);
else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+ xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
return;
@@ -708,9 +727,9 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
trace_xe_sched_job_run(job);
- if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) {
+ if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
if (!exec_queue_registered(q))
- register_engine(q);
+ register_exec_queue(q);
if (!lr) /* LR jobs are emitted in the exec IOCTL */
q->ring_ops->emit_job(job);
submit_exec_queue(q);
@@ -844,6 +863,40 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
xe_sched_tdr_queue_imm(&q->guc->sched);
}
+static bool guc_submit_hint_wedged(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_exec_queue *q;
+ unsigned long index;
+ int err;
+
+ if (xe->wedged.mode != 2)
+ return false;
+
+ if (xe_device_wedged(xe))
+ return true;
+
+ xe_device_declare_wedged(xe);
+
+ xe_guc_submit_reset_prepare(guc);
+ xe_guc_ct_stop(&guc->ct);
+
+ err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm,
+ guc_submit_wedged_fini, guc);
+ if (err) {
+ drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
+ return true; /* Device is wedged anyway */
+ }
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ if (xe_exec_queue_get_unless_zero(q))
+ set_exec_queue_wedged(q);
+ mutex_unlock(&guc->submission_state.lock);
+
+ return true;
+}
+
static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
{
struct xe_guc_exec_queue *ge =
@@ -852,10 +905,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
struct xe_gpu_scheduler *sched = &ge->sched;
+ bool wedged;
xe_assert(xe, xe_exec_queue_is_lr(q));
trace_xe_exec_queue_lr_cleanup(q);
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+
/* Kill the run_job / process_msg entry points */
xe_sched_submission_stop(sched);
@@ -870,7 +926,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
* xe_guc_deregister_done_handler() which treats it as an unexpected
* state.
*/
- if (exec_queue_registered(q) && !exec_queue_destroyed(q)) {
+ if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
struct xe_guc *guc = exec_queue_to_guc(q);
int ret;
@@ -905,6 +961,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
int err = -ETIME;
int i = 0;
+ bool wedged;
/*
* TDR has fired before free job worker. Common if exec queue
@@ -923,11 +980,15 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
"VM job timed out on non-killed execqueue\n");
- simple_error_capture(q);
- xe_devcoredump(job);
+ if (!exec_queue_killed(q)) {
+ simple_error_capture(q);
+ xe_devcoredump(job);
+ }
trace_xe_sched_job_timedout(job);
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+
/* Kill the run_job entry point */
xe_sched_submission_stop(sched);
@@ -935,8 +996,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* Kernel jobs should never fail, nor should VM jobs if they do
* somethings has gone wrong and the GT needs a reset
*/
- if (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
- (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) {
+ if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
+ (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
if (!xe_sched_invalidate_job(job, 2)) {
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
@@ -946,7 +1007,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
}
/* Engine state now stable, disable scheduling if needed */
- if (exec_queue_registered(q)) {
+ if (!wedged && exec_queue_registered(q)) {
struct xe_guc *guc = exec_queue_to_guc(q);
int ret;
@@ -989,6 +1050,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
*/
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
+
xe_guc_exec_queue_trigger_cleanup(q);
/* Mark all outstanding jobs as bad, thus completing them */
@@ -1028,7 +1090,7 @@ static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
/* We must block on kernel engines so slabs are empty on driver unload */
- if (q->flags & EXEC_QUEUE_FLAG_PERMANENT)
+ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
__guc_exec_queue_fini_async(&q->guc->fini_async);
else
queue_work(system_wq, &q->guc->fini_async);
@@ -1063,7 +1125,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
{
- return !exec_queue_killed_or_banned(q) && exec_queue_registered(q);
+ return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
}
static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
@@ -1274,7 +1336,7 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q)
{
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
- if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT))
+ if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
guc_exec_queue_add_msg(q, msg, CLEANUP);
else
__guc_exec_queue_fini(exec_queue_to_guc(q), q);
@@ -1285,7 +1347,8 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
{
struct xe_sched_msg *msg;
- if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
+ if (q->sched_props.priority == priority ||
+ exec_queue_killed_or_banned_or_wedged(q))
return 0;
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
@@ -1303,7 +1366,7 @@ static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_u
struct xe_sched_msg *msg;
if (q->sched_props.timeslice_us == timeslice_us ||
- exec_queue_killed_or_banned(q))
+ exec_queue_killed_or_banned_or_wedged(q))
return 0;
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
@@ -1322,7 +1385,7 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
struct xe_sched_msg *msg;
if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
- exec_queue_killed_or_banned(q))
+ exec_queue_killed_or_banned_or_wedged(q))
return 0;
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
@@ -1339,7 +1402,7 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q)
{
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
- if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending)
+ if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending)
return -EINVAL;
q->guc->suspend_pending = true;
@@ -1410,7 +1473,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
set_exec_queue_suspended(q);
suspend_fence_signal(q);
}
- atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+ atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED,
&q->guc->state);
q->guc->resume_time = 0;
trace_xe_exec_queue_stop(q);
@@ -1458,7 +1521,7 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc)
wait_event(guc->ct.wq, !guc_read_stopped(guc));
}
-int xe_guc_submit_stop(struct xe_guc *guc)
+void xe_guc_submit_stop(struct xe_guc *guc)
{
struct xe_exec_queue *q;
unsigned long index;
@@ -1478,14 +1541,13 @@ int xe_guc_submit_stop(struct xe_guc *guc)
* creation which is protected by guc->submission_state.lock.
*/
- return 0;
}
static void guc_exec_queue_start(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
- if (!exec_queue_killed_or_banned(q)) {
+ if (!exec_queue_killed_or_banned_or_wedged(q)) {
int i;
trace_xe_exec_queue_resubmit(q);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index fad0421ead36..4275b7da9df5 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -16,7 +16,7 @@ int xe_guc_submit_init(struct xe_guc *guc);
int xe_guc_submit_reset_prepare(struct xe_guc *guc);
void xe_guc_submit_reset_wait(struct xe_guc *guc);
-int xe_guc_submit_stop(struct xe_guc *guc);
+void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 82bd93f7867d..546ac6350a31 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -72,15 +72,6 @@ struct xe_guc {
atomic_t stopped;
/** @submission_state.lock: protects submission state */
struct mutex lock;
- /** @submission_state.suspend: suspend fence state */
- struct {
- /** @submission_state.suspend.lock: suspend fences lock */
- spinlock_t lock;
- /** @submission_state.suspend.context: suspend fences context */
- u64 context;
- /** @submission_state.suspend.seqno: suspend fences seqno */
- u32 seqno;
- } suspend;
#ifdef CONFIG_PROVE_LOCKING
#define NUM_SUBMIT_WQ 256
/** @submission_state.submit_wq_pool: submission ordered workqueues pool */
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 455f375c1cbd..45f582a7caaa 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -18,6 +18,7 @@
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
#include "xe_gt_printk.h"
+#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
@@ -716,6 +717,11 @@ static void check_gsc_availability(struct xe_gt *gt)
*/
if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
+
+ /* interrupts where previously enabled, so turn them off */
+ xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
+ xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
+
drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
}
}
@@ -766,6 +772,57 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
xe_hw_fence_irq_run(hwe->fence_irq);
}
+static bool
+is_slice_common_per_gslice(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) >= 1255;
+}
+
+static void
+xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
+ struct xe_hw_engine_snapshot *snapshot)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int dss;
+ u16 group, instance;
+
+ snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
+
+ if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
+ return;
+
+ if (is_slice_common_per_gslice(xe) == false) {
+ snapshot->reg.instdone.slice_common[0] =
+ xe_mmio_read32(gt, SC_INSTDONE);
+ snapshot->reg.instdone.slice_common_extra[0] =
+ xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
+ snapshot->reg.instdone.slice_common_extra2[0] =
+ xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
+ } else {
+ for_each_geometry_dss(dss, gt, group, instance) {
+ snapshot->reg.instdone.slice_common[dss] =
+ xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
+ snapshot->reg.instdone.slice_common_extra[dss] =
+ xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
+ snapshot->reg.instdone.slice_common_extra2[dss] =
+ xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
+ }
+ }
+
+ for_each_geometry_dss(dss, gt, group, instance) {
+ snapshot->reg.instdone.sampler[dss] =
+ xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
+ snapshot->reg.instdone.row[dss] =
+ xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
+
+ if (GRAPHICS_VERx100(xe) >= 1255)
+ snapshot->reg.instdone.geom_svg[dss] =
+ xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
+ group, instance);
+ }
+}
+
/**
* xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
@@ -780,6 +837,7 @@ struct xe_hw_engine_snapshot *
xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
{
struct xe_hw_engine_snapshot *snapshot;
+ size_t len;
u64 val;
if (!xe_hw_engine_is_valid(hwe))
@@ -790,8 +848,30 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
if (!snapshot)
return NULL;
+ /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
+ * includes xe_hw_engine_types.h the length of this 3 registers can't be
+ * set in struct xe_hw_engine_snapshot, so here doing additional
+ * allocations.
+ */
+ len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
+ snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
+ snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
+ snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
+ snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
+ snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
+ snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
+ if (!snapshot->reg.instdone.slice_common ||
+ !snapshot->reg.instdone.slice_common_extra ||
+ !snapshot->reg.instdone.slice_common_extra2 ||
+ !snapshot->reg.instdone.sampler ||
+ !snapshot->reg.instdone.row ||
+ !snapshot->reg.instdone.geom_svg) {
+ xe_hw_engine_snapshot_free(snapshot);
+ return NULL;
+ }
+
snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
- snapshot->class = hwe->class;
+ snapshot->hwe = hwe;
snapshot->logical_instance = hwe->logical_instance;
snapshot->forcewake.domain = hwe->domain;
snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
@@ -828,6 +908,13 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
+ if (xe_gt_has_indirect_ring_state(hwe->gt)) {
+ snapshot->reg.indirect_ring_state =
+ hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
+ snapshot->reg.ring_start_udw =
+ hw_engine_mmio_read32(hwe, RING_START_UDW(0));
+ }
+
snapshot->reg.ring_head =
hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
snapshot->reg.ring_tail =
@@ -841,13 +928,57 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
+ xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
- if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+ if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
return snapshot;
}
+static void
+xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
+{
+ struct xe_gt *gt = snapshot->hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ u16 group, instance;
+ unsigned int dss;
+
+ drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
+
+ if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
+ return;
+
+ if (is_slice_common_per_gslice(xe) == false) {
+ drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
+ snapshot->reg.instdone.slice_common[0]);
+ drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
+ snapshot->reg.instdone.slice_common_extra[0]);
+ drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
+ snapshot->reg.instdone.slice_common_extra2[0]);
+ } else {
+ for_each_geometry_dss(dss, gt, group, instance) {
+ drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
+ snapshot->reg.instdone.slice_common[dss]);
+ drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
+ snapshot->reg.instdone.slice_common_extra[dss]);
+ drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
+ snapshot->reg.instdone.slice_common_extra2[dss]);
+ }
+ }
+
+ for_each_geometry_dss(dss, gt, group, instance) {
+ drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
+ snapshot->reg.instdone.sampler[dss]);
+ drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
+ snapshot->reg.instdone.row[dss]);
+
+ if (GRAPHICS_VERx100(xe) >= 1255)
+ drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
+ dss, snapshot->reg.instdone.geom_svg[dss]);
+ }
+}
+
/**
* xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
* @snapshot: Xe HW Engine snapshot object.
@@ -873,6 +1004,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
snapshot->reg.ring_execlist_sq_contents);
drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
+ drm_printf(p, "\tRING_START_UDW: 0x%08x\n",
+ snapshot->reg.ring_start_udw);
drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
@@ -886,10 +1019,15 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
+ drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
+ snapshot->reg.indirect_ring_state);
drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
- if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+ xe_hw_engine_snapshot_instdone_print(snapshot, p);
+
+ if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
drm_printf(p, "\tRCU_MODE: 0x%08x\n",
snapshot->reg.rcu_mode);
+ drm_puts(p, "\n");
}
/**
@@ -904,6 +1042,12 @@ void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
if (!snapshot)
return;
+ kfree(snapshot->reg.instdone.slice_common);
+ kfree(snapshot->reg.instdone.slice_common_extra);
+ kfree(snapshot->reg.instdone.slice_common_extra2);
+ kfree(snapshot->reg.instdone.sampler);
+ kfree(snapshot->reg.instdone.row);
+ kfree(snapshot->reg.instdone.geom_svg);
kfree(snapshot->name);
kfree(snapshot);
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index d7f828c76cc5..5f4b67acba99 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -158,8 +158,8 @@ struct xe_hw_engine {
struct xe_hw_engine_snapshot {
/** @name: name of the hw engine */
char *name;
- /** @class: class of this hw engine */
- enum xe_engine_class class;
+ /** @hwe: hw engine */
+ struct xe_hw_engine *hwe;
/** @logical_instance: logical instance of this hw engine */
u16 logical_instance;
/** @forcewake: Force Wake information snapshot */
@@ -189,6 +189,8 @@ struct xe_hw_engine_snapshot {
u32 ring_hws_pga;
/** @reg.ring_start: RING_START */
u32 ring_start;
+ /** @reg.ring_start_udw: RING_START_UDW */
+ u32 ring_start_udw;
/** @reg.ring_head: RING_HEAD */
u32 ring_head;
/** @reg.ring_tail: RING_TAIL */
@@ -207,10 +209,28 @@ struct xe_hw_engine_snapshot {
u32 ring_emr;
/** @reg.ring_eir: RING_EIR */
u32 ring_eir;
+ /** @reg.indirect_ring_state: INDIRECT_RING_STATE */
+ u32 indirect_ring_state;
/** @reg.ipehr: IPEHR */
u32 ipehr;
/** @reg.rcu_mode: RCU_MODE */
u32 rcu_mode;
+ struct {
+ /** @reg.instdone.ring: RING_INSTDONE */
+ u32 ring;
+ /** @reg.instdone.slice_common: SC_INSTDONE */
+ u32 *slice_common;
+ /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */
+ u32 *slice_common_extra;
+ /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */
+ u32 *slice_common_extra2;
+ /** @reg.instdone.sampler: SAMPLER_INSTDONE */
+ u32 *sampler;
+ /** @reg.instdone.row: ROW_INSTDONE */
+ u32 *row;
+ /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */
+ u32 *geom_svg;
+ } instdone;
} reg;
};
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 453e601ddd5e..dca275117232 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -550,12 +550,17 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel)
{
u32 uval;
+ /* hwmon sysfs attribute of current available only for package */
+ if (channel != CHANNEL_PKG)
+ return 0;
+
switch (attr) {
case hwmon_curr_crit:
- case hwmon_curr_label:
- if (channel == CHANNEL_PKG)
return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ case hwmon_curr_label:
+ return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+ (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444;
break;
default:
return 0;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 615bbc372ac6..9b0a4078add3 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -11,7 +11,6 @@
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_gfx_state_commands.h"
#include "regs/xe_engine_regs.h"
-#include "regs/xe_gpu_commands.h"
#include "regs/xe_lrc_layout.h"
#include "xe_bb.h"
#include "xe_bo.h"
@@ -34,12 +33,15 @@
#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
+#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
+
struct xe_lrc_snapshot {
struct xe_bo *lrc_bo;
void *lrc_snapshot;
unsigned long lrc_size, lrc_offset;
u32 context_desc;
+ u32 indirect_context_desc;
u32 head;
struct {
u32 internal;
@@ -55,20 +57,25 @@ lrc_to_xe(struct xe_lrc *lrc)
return gt_to_xe(lrc->fence_ctx.gt);
}
-size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
+size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
{
+ struct xe_device *xe = gt_to_xe(gt);
+ size_t size;
+
switch (class) {
case XE_ENGINE_CLASS_RENDER:
if (GRAPHICS_VER(xe) >= 20)
- return 4 * SZ_4K;
+ size = 4 * SZ_4K;
else
- return 14 * SZ_4K;
+ size = 14 * SZ_4K;
+ break;
case XE_ENGINE_CLASS_COMPUTE:
/* 14 pages since graphics_ver == 11 */
if (GRAPHICS_VER(xe) >= 20)
- return 3 * SZ_4K;
+ size = 3 * SZ_4K;
else
- return 14 * SZ_4K;
+ size = 14 * SZ_4K;
+ break;
default:
WARN(1, "Unknown engine class: %d", class);
fallthrough;
@@ -76,8 +83,14 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
case XE_ENGINE_CLASS_VIDEO_DECODE:
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
case XE_ENGINE_CLASS_OTHER:
- return 2 * SZ_4K;
+ size = 2 * SZ_4K;
}
+
+ /* Add indirect ring state page */
+ if (xe_gt_has_indirect_ring_state(gt))
+ size += LRC_INDIRECT_RING_STATE_SIZE;
+
+ return size;
}
/*
@@ -508,6 +521,32 @@ static const u8 xe2_xcs_offsets[] = {
0
};
+static const u8 xe2_indirect_ring_state_offsets[] = {
+ NOP(1), /* [0x00] */
+ LRI(5, POSTED), /* [0x01] */
+ REG(0x034), /* [0x02] RING_BUFFER_HEAD */
+ REG(0x030), /* [0x04] RING_BUFFER_TAIL */
+ REG(0x038), /* [0x06] RING_BUFFER_START */
+ REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
+ REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
+
+ NOP(5), /* [0x0c] */
+ LRI(9, POSTED), /* [0x11] */
+ REG(0x168), /* [0x12] BB_ADDR_UDW */
+ REG(0x140), /* [0x14] BB_ADDR */
+ REG(0x110), /* [0x16] BB_STATE */
+ REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
+ REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
+ REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
+ REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
+ REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
+ REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
+
+ NOP(12), /* [0x00] */
+
+ 0
+};
+
#undef REG16
#undef REG
#undef LRI
@@ -546,6 +585,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (xe_gt_has_indirect_ring_state(hwe->gt))
+ regs[CTX_CONTEXT_CONTROL] |=
+ _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
+
/* TODO: Timestamp */
}
@@ -589,6 +632,11 @@ static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
regs[x + 1] |= STOP_RING << 16;
}
+static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
+{
+ return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
+}
+
static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
{
return 0;
@@ -643,6 +691,12 @@ static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
}
+static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
+{
+ /* Indirect ring state page is at the very end of LRC */
+ return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
+}
+
#define DECL_MAP_ADDR_HELPERS(elem) \
static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
{ \
@@ -663,6 +717,7 @@ DECL_MAP_ADDR_HELPERS(seqno)
DECL_MAP_ADDR_HELPERS(regs)
DECL_MAP_ADDR_HELPERS(start_seqno)
DECL_MAP_ADDR_HELPERS(parallel)
+DECL_MAP_ADDR_HELPERS(indirect_ring)
#undef DECL_MAP_ADDR_HELPERS
@@ -671,6 +726,35 @@ u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
return __xe_lrc_pphwsp_ggtt_addr(lrc);
}
+u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
+{
+ if (!xe_lrc_has_indirect_ring_state(lrc))
+ return 0;
+
+ return __xe_lrc_indirect_ring_ggtt_addr(lrc);
+}
+
+static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+
+ map = __xe_lrc_indirect_ring_map(lrc);
+ iosys_map_incr(&map, reg_nr * sizeof(u32));
+ return xe_map_read32(xe, &map);
+}
+
+static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
+ int reg_nr, u32 val)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+
+ map = __xe_lrc_indirect_ring_map(lrc);
+ iosys_map_incr(&map, reg_nr * sizeof(u32));
+ xe_map_write32(xe, &map, val);
+}
+
u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
{
struct xe_device *xe = lrc_to_xe(lrc);
@@ -693,20 +777,25 @@ void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
static void *empty_lrc_data(struct xe_hw_engine *hwe)
{
- struct xe_device *xe = gt_to_xe(hwe->gt);
+ struct xe_gt *gt = hwe->gt;
void *data;
u32 *regs;
- data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
+ data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
if (!data)
return NULL;
/* 1st page: Per-Process of HW status Page */
regs = data + LRC_PPHWSP_SIZE;
- set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
+ set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
set_context_control(regs, hwe);
set_memory_based_intr(regs, hwe);
reset_stop_ring(regs, hwe);
+ if (xe_gt_has_indirect_ring_state(gt)) {
+ regs = data + xe_gt_lrc_size(gt, hwe->class) -
+ LRC_INDIRECT_RING_STATE_SIZE;
+ set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
+ }
return data;
}
@@ -731,23 +820,27 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct iosys_map map;
void *init_data = NULL;
u32 arb_enable;
+ u32 lrc_size;
int err;
lrc->flags = 0;
+ lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
+ if (xe_gt_has_indirect_ring_state(gt))
+ lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
- lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
- ring_size + xe_lrc_size(xe, hwe->class),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
+ lrc->size = lrc_size;
lrc->tile = gt_to_tile(hwe->gt);
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
@@ -772,10 +865,10 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
- xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
+ xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
} else {
xe_map_memcpy_to(xe, &map, 0, init_data,
- xe_lrc_size(xe, hwe->class));
+ xe_gt_lrc_size(gt, hwe->class));
kfree(init_data);
}
@@ -786,11 +879,25 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_drm_client_add_bo(vm->xef->client, lrc->bo);
}
- xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+ if (xe_gt_has_indirect_ring_state(gt)) {
+ xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
+ __xe_lrc_indirect_ring_ggtt_addr(lrc));
+
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
+ __xe_lrc_ring_ggtt_addr(lrc));
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
+ RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+ } else {
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
+ RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+ }
+
if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
@@ -834,14 +941,36 @@ void xe_lrc_finish(struct xe_lrc *lrc)
xe_bo_put(lrc->bo);
}
+void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
+{
+ if (xe_lrc_has_indirect_ring_state(lrc))
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
+ else
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
+}
+
+u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
+{
+ if (xe_lrc_has_indirect_ring_state(lrc))
+ return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
+ else
+ return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
+}
+
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
{
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
+ if (xe_lrc_has_indirect_ring_state(lrc))
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
+ else
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
}
u32 xe_lrc_ring_head(struct xe_lrc *lrc)
{
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
+ if (xe_lrc_has_indirect_ring_state(lrc))
+ return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
+ else
+ return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
}
u32 xe_lrc_ring_space(struct xe_lrc *lrc)
@@ -1214,7 +1343,7 @@ void xe_lrc_dump_default(struct drm_printer *p,
* hardware status page.
*/
dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
- remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
+ remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
while (remaining_dw > 0) {
if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
@@ -1354,10 +1483,11 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
if (!snapshot)
return NULL;
- snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc));
+ snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
+ snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
snapshot->head = xe_lrc_ring_head(lrc);
snapshot->tail.internal = lrc->ring.tail;
- snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
+ snapshot->tail.memory = xe_lrc_ring_tail(lrc);
snapshot->start_seqno = xe_lrc_start_seqno(lrc);
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
@@ -1382,7 +1512,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
if (!snapshot->lrc_snapshot)
goto put_bo;
- dma_resv_lock(bo->ttm.base.resv, NULL);
+ xe_bo_lock(bo, false);
if (!ttm_bo_vmap(&bo->ttm, &src)) {
xe_map_memcpy_from(xe_bo_device(bo),
snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
@@ -1392,7 +1522,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
kvfree(snapshot->lrc_snapshot);
snapshot->lrc_snapshot = NULL;
}
- dma_resv_unlock(bo->ttm.base.resv);
+ xe_bo_unlock(bo);
put_bo:
xe_bo_put(bo);
}
@@ -1405,6 +1535,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
return;
drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
+ drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
+ snapshot->indirect_context_desc);
drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
snapshot->tail.internal, snapshot->tail.memory);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index d32fa31faa2c..e0e841963c23 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -5,14 +5,17 @@
#ifndef _XE_LRC_H_
#define _XE_LRC_H_
-#include "xe_lrc_types.h"
+#include <linux/types.h>
struct drm_printer;
struct xe_bb;
struct xe_device;
struct xe_exec_queue;
enum xe_engine_class;
+struct xe_gt;
struct xe_hw_engine;
+struct xe_lrc;
+struct xe_lrc_snapshot;
struct xe_vm;
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
@@ -21,14 +24,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
void xe_lrc_finish(struct xe_lrc *lrc);
-size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
+size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
+void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail);
+u32 xe_lrc_ring_tail(struct xe_lrc *lrc);
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
u32 xe_lrc_ring_head(struct xe_lrc *lrc);
u32 xe_lrc_ring_space(struct xe_lrc *lrc);
void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
u32 *xe_lrc_regs(struct xe_lrc *lrc);
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index b716df0dfb4e..cdbf03faef15 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -20,10 +20,14 @@ struct xe_lrc {
*/
struct xe_bo *bo;
+ /** @size: size of lrc including any indirect ring state page */
+ u32 size;
+
/** @tile: tile which this LRC belongs to */
struct xe_tile *tile;
/** @flags: LRC flags */
+#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
u32 flags;
/** @ring: submission ring state */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9f6e9b7f11c8..36db5ed1a572 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -14,8 +14,8 @@
#include <generated/xe_wa_oob.h>
+#include "instructions/xe_gpu_commands.h"
#include "instructions/xe_mi_commands.h"
-#include "regs/xe_gpu_commands.h"
#include "regs/xe_gtt_defs.h"
#include "tests/xe_test.h"
#include "xe_assert.h"
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 334637511e75..05edab0e085d 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -15,6 +15,7 @@
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
+#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
@@ -423,41 +424,33 @@ int xe_mmio_init(struct xe_device *xe)
u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
{
struct xe_tile *tile = gt_to_tile(gt);
+ u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+ return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
}
u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
{
struct xe_tile *tile = gt_to_tile(gt);
+ u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+ return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
}
void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
{
struct xe_tile *tile = gt_to_tile(gt);
+ u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+ writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
}
u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
{
struct xe_tile *tile = gt_to_tile(gt);
+ u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+ return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
}
u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
@@ -486,10 +479,9 @@ bool xe_mmio_in_range(const struct xe_gt *gt,
const struct xe_mmio_range *range,
struct xe_reg reg)
{
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
+ u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
- return range && reg.addr >= range->start && reg.addr <= range->end;
+ return range && addr >= range->start && addr <= range->end;
}
/**
@@ -519,10 +511,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
u32 ldw, udw, oldudw, retries;
- if (reg.addr < gt->mmio.adj_limit) {
- reg.addr += gt->mmio.adj_offset;
- reg_udw.addr += gt->mmio.adj_offset;
- }
+ reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);
+
+ /* we shouldn't adjust just one register address */
+ xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);
oldudw = xe_mmio_read32(gt, reg_udw);
for (retries = 5; retries; --retries) {
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index a3cd7b3036c7..445ec6a0753e 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -36,4 +36,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic);
+static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr)
+{
+ if (addr < gt->mmio.adj_limit)
+ addr += gt->mmio.adj_offset;
+ return addr;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 1e92f8ee07ba..f04754ad911b 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -9,10 +9,12 @@
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
#include "xe_platform_types.h"
+#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_step_types.h"
@@ -36,34 +38,23 @@ struct xe_mocs_entry {
u16 used;
};
+struct xe_mocs_info;
+
+struct xe_mocs_ops {
+ void (*dump)(struct xe_mocs_info *mocs, unsigned int flags,
+ struct xe_gt *gt, struct drm_printer *p);
+};
+
struct xe_mocs_info {
unsigned int size;
unsigned int n_entries;
const struct xe_mocs_entry *table;
+ const struct xe_mocs_ops *ops;
u8 uc_index;
u8 wb_index;
u8 unused_entries_index;
};
-/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
-#define _LE_CACHEABILITY(value) ((value) << 0)
-#define _LE_TGT_CACHE(value) ((value) << 2)
-#define LE_LRUM(value) ((value) << 4)
-#define LE_AOM(value) ((value) << 6)
-#define LE_RSC(value) ((value) << 7)
-#define LE_SCC(value) ((value) << 8)
-#define LE_PFM(value) ((value) << 11)
-#define LE_SCF(value) ((value) << 14)
-#define LE_COS(value) ((value) << 15)
-#define LE_SSE(value) ((value) << 17)
-
-/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
-#define L3_ESC(value) ((value) << 0)
-#define L3_SCC(value) ((value) << 1)
-#define _L3_CACHEABILITY(value) ((value) << 4)
-#define L3_GLBGO(value) ((value) << 6)
-#define L3_LKUP(value) ((value) << 7)
-
/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
#define IG_PAT REG_BIT(8)
#define L3_CACHE_POLICY_MASK REG_GENMASK(5, 4)
@@ -80,22 +71,22 @@ struct xe_mocs_info {
* Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
* the same as LE_UC
*/
-#define LE_0_PAGETABLE _LE_CACHEABILITY(0)
-#define LE_1_UC _LE_CACHEABILITY(1)
-#define LE_2_WT _LE_CACHEABILITY(2)
-#define LE_3_WB _LE_CACHEABILITY(3)
+#define LE_0_PAGETABLE LE_CACHEABILITY(0)
+#define LE_1_UC LE_CACHEABILITY(1)
+#define LE_2_WT LE_CACHEABILITY(2)
+#define LE_3_WB LE_CACHEABILITY(3)
/* Target cache */
-#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0)
-#define LE_TC_1_LLC _LE_TGT_CACHE(1)
-#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2)
-#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3)
+#define LE_TC_0_PAGETABLE LE_TGT_CACHE(0)
+#define LE_TC_1_LLC LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT LE_TGT_CACHE(3)
/* L3 caching options */
-#define L3_0_DIRECT _L3_CACHEABILITY(0)
-#define L3_1_UC _L3_CACHEABILITY(1)
-#define L3_2_RESERVED _L3_CACHEABILITY(2)
-#define L3_3_WB _L3_CACHEABILITY(3)
+#define L3_0_DIRECT L3_CACHEABILITY(0)
+#define L3_1_UC L3_CACHEABILITY(1)
+#define L3_2_RESERVED L3_CACHEABILITY(2)
+#define L3_3_WB L3_CACHEABILITY(3)
/* L4 caching options */
#define L4_0_WB REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0)
@@ -107,6 +98,8 @@ struct xe_mocs_info {
#define XE2_L3_1_XD REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1)
#define XE2_L3_3_UC REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3)
+#define XE2_L3_CLOS_MASK REG_GENMASK(7, 6)
+
#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
[__idx] = { \
.control_value = __control_value, \
@@ -255,6 +248,84 @@ static const struct xe_mocs_entry gen12_mocs_desc[] = {
L3_1_UC)
};
+static bool regs_are_mcr(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe_gt_is_media_type(gt))
+ return MEDIA_VER(xe) >= 20;
+ else
+ return GRAPHICS_VERx100(xe) >= 1250;
+}
+
+static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int i, j;
+ u32 reg_val;
+
+ drm_printf(p, "LNCFCMOCS[idx] = [ESC, SCC, L3CC] (value)\n\n");
+
+ for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+ if (regs_are_mcr(gt))
+ reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+ else
+ reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+
+ drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+ j++,
+ !!(reg_val & L3_ESC_MASK),
+ REG_FIELD_GET(L3_SCC_MASK, reg_val),
+ REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val),
+ reg_val);
+
+ drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+ j,
+ !!(reg_val & L3_UPPER_IDX_ESC_MASK),
+ REG_FIELD_GET(L3_UPPER_IDX_SCC_MASK, reg_val),
+ REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val),
+ reg_val);
+ }
+}
+
+static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
+ struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int i;
+ u32 reg_val;
+
+ if (flags & HAS_GLOBAL_MOCS) {
+ drm_printf(p, "Global mocs table configuration:\n");
+ drm_printf(p, "GLOB_MOCS[idx] = [LeCC, TC, LRUM, AOM, RSC, SCC, PFM, SCF, CoS, SSE] (value)\n\n");
+
+ for (i = 0; i < info->n_entries; i++) {
+ if (regs_are_mcr(gt))
+ reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+ else
+ reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+
+ drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n",
+ i,
+ REG_FIELD_GET(LE_CACHEABILITY_MASK, reg_val),
+ REG_FIELD_GET(LE_TGT_CACHE_MASK, reg_val),
+ REG_FIELD_GET(LE_LRUM_MASK, reg_val),
+ !!(reg_val & LE_AOM_MASK),
+ !!(reg_val & LE_RSC_MASK),
+ REG_FIELD_GET(LE_SCC_MASK, reg_val),
+ REG_FIELD_GET(LE_PFM_MASK, reg_val),
+ !!(reg_val & LE_SCF_MASK),
+ REG_FIELD_GET(LE_COS_MASK, reg_val),
+ REG_FIELD_GET(LE_SSE_MASK, reg_val),
+ reg_val);
+ }
+ }
+
+ xelp_lncf_dump(info, gt, p);
+}
+
+static const struct xe_mocs_ops xelp_mocs_ops = {
+ .dump = xelp_mocs_dump,
+};
+
static const struct xe_mocs_entry dg1_mocs_desc[] = {
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
@@ -291,6 +362,40 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = {
MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
};
+static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags,
+ struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int i, j;
+ u32 reg_val;
+
+ drm_printf(p, "LNCFCMOCS[idx] = [UCL3LOOKUP, GLBGO, L3CC] (value)\n\n");
+
+ for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+ if (regs_are_mcr(gt))
+ reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+ else
+ reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+
+ drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+ j++,
+ !!(reg_val & L3_LKUP_MASK),
+ !!(reg_val & L3_GLBGO_MASK),
+ REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val),
+ reg_val);
+
+ drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+ j,
+ !!(reg_val & L3_UPPER_LKUP_MASK),
+ !!(reg_val & L3_UPPER_GLBGO_MASK),
+ REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val),
+ reg_val);
+ }
+}
+
+static const struct xe_mocs_ops xehp_mocs_ops = {
+ .dump = xehp_lncf_dump,
+};
+
static const struct xe_mocs_entry pvc_mocs_desc[] = {
/* Error */
MOCS_ENTRY(0, 0, L3_3_WB),
@@ -302,6 +407,36 @@ static const struct xe_mocs_entry pvc_mocs_desc[] = {
MOCS_ENTRY(2, 0, L3_3_WB),
};
+static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct xe_gt *gt,
+ struct drm_printer *p)
+{
+ unsigned int i, j;
+ u32 reg_val;
+
+ drm_printf(p, "LNCFCMOCS[idx] = [ L3CC ] (value)\n\n");
+
+ for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+ if (regs_are_mcr(gt))
+ reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+ else
+ reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+
+ drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n",
+ j++,
+ REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val),
+ reg_val);
+
+ drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n",
+ j,
+ REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val),
+ reg_val);
+ }
+}
+
+static const struct xe_mocs_ops pvc_mocs_ops = {
+ .dump = pvc_mocs_dump,
+};
+
static const struct xe_mocs_entry mtl_mocs_desc[] = {
/* Error - Reserved for Non-Use */
MOCS_ENTRY(0,
@@ -353,6 +488,36 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = {
L3_GLBGO(1) | L3_1_UC),
};
+static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
+ struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int i;
+ u32 reg_val;
+
+ drm_printf(p, "Global mocs table configuration:\n");
+ drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L4_CACHE_POLICY] (value)\n\n");
+
+ for (i = 0; i < info->n_entries; i++) {
+ if (regs_are_mcr(gt))
+ reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+ else
+ reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+
+ drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u] (%#8x)\n",
+ i,
+ !!(reg_val & IG_PAT),
+ REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val),
+ reg_val);
+ }
+
+ /* MTL lncf mocs table pattern is similar to that of xehp */
+ xehp_lncf_dump(info, flags, gt, p);
+}
+
+static const struct xe_mocs_ops mtl_mocs_ops = {
+ .dump = mtl_mocs_dump,
+};
+
static const struct xe_mocs_entry xe2_mocs_table[] = {
/* Defer to PAT */
MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0),
@@ -366,6 +531,34 @@ static const struct xe_mocs_entry xe2_mocs_table[] = {
MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
};
+static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
+ struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int i;
+ u32 reg_val;
+
+ drm_printf(p, "Global mocs table configuration:\n");
+ drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L3_CLOS, L3_CACHE_POLICY, L4_CACHE_POLICY] (value)\n\n");
+
+ for (i = 0; i < info->n_entries; i++) {
+ if (regs_are_mcr(gt))
+ reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+ else
+ reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+
+ drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+ i,
+ !!(reg_val & IG_PAT),
+ REG_FIELD_GET(XE2_L3_CLOS_MASK, reg_val),
+ REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val),
+ reg_val);
+ }
+}
+
+static const struct xe_mocs_ops xe2_mocs_ops = {
+ .dump = xe2_mocs_dump,
+};
+
static unsigned int get_mocs_settings(struct xe_device *xe,
struct xe_mocs_info *info)
{
@@ -376,6 +569,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
switch (xe->info.platform) {
case XE_LUNARLAKE:
case XE_BATTLEMAGE:
+ info->ops = &xe2_mocs_ops;
info->size = ARRAY_SIZE(xe2_mocs_table);
info->table = xe2_mocs_table;
info->n_entries = XE2_NUM_MOCS_ENTRIES;
@@ -384,6 +578,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
info->unused_entries_index = 4;
break;
case XE_PVC:
+ info->ops = &pvc_mocs_ops;
info->size = ARRAY_SIZE(pvc_mocs_desc);
info->table = pvc_mocs_desc;
info->n_entries = PVC_NUM_MOCS_ENTRIES;
@@ -392,6 +587,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
info->unused_entries_index = 2;
break;
case XE_METEORLAKE:
+ info->ops = &mtl_mocs_ops;
info->size = ARRAY_SIZE(mtl_mocs_desc);
info->table = mtl_mocs_desc;
info->n_entries = MTL_NUM_MOCS_ENTRIES;
@@ -399,6 +595,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
info->unused_entries_index = 1;
break;
case XE_DG2:
+ info->ops = &xehp_mocs_ops;
info->size = ARRAY_SIZE(dg2_mocs_desc);
info->table = dg2_mocs_desc;
info->uc_index = 1;
@@ -410,6 +607,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
info->unused_entries_index = 3;
break;
case XE_DG1:
+ info->ops = &xelp_mocs_ops;
info->size = ARRAY_SIZE(dg1_mocs_desc);
info->table = dg1_mocs_desc;
info->uc_index = 1;
@@ -421,6 +619,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
case XE_ALDERLAKE_S:
case XE_ALDERLAKE_P:
case XE_ALDERLAKE_N:
+ info->ops = &xelp_mocs_ops;
info->size = ARRAY_SIZE(gen12_mocs_desc);
info->table = gen12_mocs_desc;
info->n_entries = XELP_NUM_MOCS_ENTRIES;
@@ -442,6 +641,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
*/
xe_assert(xe, info->unused_entries_index != 0);
+ xe_assert(xe, !info->ops || info->ops->dump);
+
if (XE_WARN_ON(info->size > info->n_entries)) {
info->table = NULL;
return 0;
@@ -467,16 +668,6 @@ static u32 get_entry_control(const struct xe_mocs_info *info,
return info->table[info->unused_entries_index].control_value;
}
-static bool regs_are_mcr(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
-
- if (xe_gt_is_media_type(gt))
- return MEDIA_VER(xe) >= 20;
- else
- return GRAPHICS_VERx100(xe) >= 1250;
-}
-
static void __init_mocs_table(struct xe_gt *gt,
const struct xe_mocs_info *info)
{
@@ -578,6 +769,33 @@ void xe_mocs_init(struct xe_gt *gt)
init_l3cc_table(gt, &table);
}
+void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_mocs_info table;
+ unsigned int flags;
+ u32 ret;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ flags = get_mocs_settings(xe, &table);
+
+ if (!table.ops->dump)
+ return;
+
+ xe_pm_runtime_get_noresume(xe);
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+
+ if (ret)
+ goto err_fw;
+
+ table.ops->dump(&table, flags, gt, p);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+err_fw:
+ xe_assert(xe, !ret);
+ xe_pm_runtime_put(xe);
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_mocs.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index 053754c5a94e..dc972ffd4d07 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -6,12 +6,17 @@
#ifndef _XE_MOCS_H_
#define _XE_MOCS_H_
-#include <linux/types.h>
-
-struct xe_exec_queue;
+struct drm_printer;
struct xe_gt;
void xe_mocs_init_early(struct xe_gt *gt);
void xe_mocs_init(struct xe_gt *gt);
+/**
+ * xe_mocs_dump - Dump mocs table
+ * @gt: GT structure
+ * @p: Printer to dump info to
+ */
+void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index ceb8345cbca6..3edeb30d5ccb 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -17,6 +17,7 @@ struct xe_modparam xe_modparam = {
.enable_display = true,
.guc_log_level = 5,
.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
+ .wedged_mode = 1,
/* the rest are 0 by default */
};
@@ -55,6 +56,10 @@ MODULE_PARM_DESC(max_vfs,
"(0 = no VFs [default]; N = allow up to N VFs)");
#endif
+module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600);
+MODULE_PARM_DESC(wedged_mode,
+ "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang");
+
struct init_funcs {
int (*init)(void);
void (*exit)(void);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index b369984f08ec..61a0d28a28c8 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -21,6 +21,7 @@ struct xe_modparam {
#ifdef CONFIG_PCI_IOV
unsigned int max_vfs;
#endif
+ int wedged_mode;
};
extern struct xe_modparam xe_modparam;
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index d5b516f115ad..4ee32ee1cc88 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -10,6 +10,7 @@
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index d6859108cc69..8dc82ee7ce6a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -146,6 +146,7 @@ static const struct xe_graphics_desc graphics_xehpc = {
.vram_flags = XE_VRAM_FLAGS_NEED64K,
.has_asid = 1,
+ .has_atomic_enable_pte_bit = 1,
.has_flat_ccs = 0,
.has_usm = 1,
};
@@ -163,7 +164,9 @@ static const struct xe_graphics_desc graphics_xelpg = {
#define XE2_GFX_FEATURES \
.dma_mask_size = 46, \
.has_asid = 1, \
+ .has_atomic_enable_pte_bit = 1, \
.has_flat_ccs = 1, \
+ .has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
.va_bits = 48, \
@@ -211,7 +214,8 @@ static const struct xe_media_desc media_xe2 = {
.name = "Xe2_LPM / Xe2_HPM",
.hw_engine_mask =
GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
- GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
+ GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) |
+ BIT(XE_HW_ENGINE_GSCCS0)
};
static const struct xe_device_desc tgl_desc = {
@@ -629,6 +633,9 @@ static int xe_info_init(struct xe_device *xe,
xe->info.va_bits = graphics_desc->va_bits;
xe->info.vm_max_level = graphics_desc->vm_max_level;
xe->info.has_asid = graphics_desc->has_asid;
+ xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
+ if (xe->info.platform != XE_PVC)
+ xe->info.has_device_atomics_on_smem = 1;
xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
@@ -656,6 +663,7 @@ static int xe_info_init(struct xe_device *xe,
gt = tile->primary_gt;
gt->info.id = xe->info.gt_count++;
gt->info.type = XE_GT_TYPE_MAIN;
+ gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
gt->info.__engine_mask = graphics_desc->hw_engine_mask;
if (MEDIA_VER(xe) < 13 && media_desc)
gt->info.__engine_mask |= media_desc->hw_engine_mask;
@@ -673,6 +681,7 @@ static int xe_info_init(struct xe_device *xe,
gt = tile->media_gt;
gt->info.type = XE_GT_TYPE_MEDIA;
+ gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
gt->info.__engine_mask = media_desc->hw_engine_mask;
gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index b1ad12fa22d6..79b0f80376a4 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -25,7 +25,9 @@ struct xe_graphics_desc {
u8 max_remote_tiles:2;
u8 has_asid:1;
+ u8 has_atomic_enable_pte_bit:1;
u8 has_flat_ccs:1;
+ u8 has_indirect_ring_state:1;
u8 has_range_tlb_invalidation:1;
u8 has_usm:1;
};
@@ -36,6 +38,8 @@ struct xe_media_desc {
u8 rel;
u64 hw_engine_mask; /* hardware engines provided by media IP */
+
+ u8 has_indirect_ring_state:1;
};
struct gmdid_map {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 37fbeda12d3b..c1831106ea4b 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -69,7 +69,7 @@
*/
#ifdef CONFIG_LOCKDEP
-struct lockdep_map xe_pm_runtime_lockdep_map = {
+static struct lockdep_map xe_pm_runtime_lockdep_map = {
.name = "xe_pm_runtime_lockdep_map"
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 7d50c6e89d8e..5b243b7feb59 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -23,11 +23,19 @@ static void preempt_fence_work_func(struct work_struct *w)
q->ops->suspend_wait(q);
dma_fence_signal(&pfence->base);
- dma_fence_end_signalling(cookie);
-
+ /*
+ * Opt for keep everything in the fence critical section. This looks really strange since we
+ * have just signalled the fence, however the preempt fences are all signalled via single
+ * global ordered-wq, therefore anything that happens in this callback can easily block
+ * progress on the entire wq, which itself may prevent other published preempt fences from
+ * ever signalling. Therefore try to keep everything here in the callback in the fence
+ * critical section. For example if something below grabs a scary lock like vm->lock,
+ * lockdep should complain since we also hold that lock whilst waiting on preempt fences to
+ * complete.
+ */
xe_vm_queue_rebind_worker(q->vm);
-
xe_exec_queue_put(q);
+ dma_fence_end_signalling(cookie);
}
static const char *
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 5b7930f46cf3..87975e45622a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -619,9 +619,40 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
int ret;
- if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
- (is_devmem || !IS_DGFX(xe)))
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ /**
+ * Default atomic expectations for different allocation scenarios are as follows:
+ *
+ * 1. Traditional API: When the VM is not in LR mode:
+ * - Device atomics are expected to function with all allocations.
+ *
+ * 2. Compute/SVM API: When the VM is in LR mode:
+ * - Device atomics are the default behavior when the bo is placed in a single region.
+ * - In all other cases device atomics will be disabled with AE=0 until an application
+ * request differently using a ioctl like madvise.
+ */
+ if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
+ if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
+ if (bo && xe_bo_has_single_placement(bo))
+ xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ /**
+ * If a SMEM+LMEM allocation is backed by SMEM, a device
+ * atomics will cause a gpu page fault and which then
+ * gets migrated to LMEM, bind such allocations with
+ * device atomics enabled.
+ */
+ else if (is_devmem && !xe_bo_has_single_placement(bo))
+ xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ } else {
+ xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ }
+
+ /**
+ * Unset AE if the platform(PVC) doesn't support it on an
+ * allocation
+ */
+ if (!xe->info.has_device_atomics_on_smem && !is_devmem)
+ xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
+ }
if (is_devmem) {
xe_walk.default_pte |= XE_PPGTT_PTE_DM;
@@ -1075,10 +1106,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
struct invalidation_fence {
struct xe_gt_tlb_invalidation_fence base;
struct xe_gt *gt;
- struct xe_vma *vma;
struct dma_fence *fence;
struct dma_fence_cb cb;
struct work_struct work;
+ u64 start;
+ u64 end;
+ u32 asid;
};
static const char *
@@ -1121,13 +1154,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
container_of(w, struct invalidation_fence, work);
trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
- xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+ xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
+ ifence->end, ifence->asid);
}
static int invalidation_fence_init(struct xe_gt *gt,
struct invalidation_fence *ifence,
struct dma_fence *fence,
- struct xe_vma *vma)
+ u64 start, u64 end, u32 asid)
{
int ret;
@@ -1144,7 +1178,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
dma_fence_get(&ifence->base.base); /* Ref for caller */
ifence->fence = fence;
ifence->gt = gt;
- ifence->vma = vma;
+ ifence->start = start;
+ ifence->end = end;
+ ifence->asid = asid;
INIT_WORK(&ifence->work, invalidation_fence_work_func);
ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1295,8 +1331,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
/* TLB invalidation must be done before signaling rebind */
if (ifence) {
- int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
- vma);
+ int err = invalidation_fence_init(tile->primary_gt,
+ ifence, fence,
+ xe_vma_start(vma),
+ xe_vma_end(vma),
+ xe_vma_vm(vma)->usm.asid);
if (err) {
dma_fence_put(fence);
kfree(ifence);
@@ -1641,7 +1680,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
dma_fence_wait(fence, false);
/* TLB invalidation must be done before signaling unbind */
- err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
+ err = invalidation_fence_init(tile->primary_gt, ifence, fence,
+ xe_vma_start(vma),
+ xe_vma_end(vma),
+ xe_vma_vm(vma)->usm.asid);
if (err) {
dma_fence_put(fence);
kfree(ifence);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index df407d73e5f5..29f847debb5c 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -16,6 +16,7 @@
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_guc_hwconfig.h"
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
index e3197c33afe2..51fbba423e27 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -6,8 +6,6 @@
#ifndef _XE_REG_SR_
#define _XE_REG_SR_
-#include "xe_reg_sr_types.h"
-
/*
* Reg save/restore bookkeeping
*/
@@ -15,6 +13,8 @@
struct xe_device;
struct xe_gt;
struct xe_hw_engine;
+struct xe_reg_sr;
+struct xe_reg_sr_entry;
struct drm_printer;
int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index d42b3f33bd7a..a3ca718456f6 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -7,9 +7,9 @@
#include <generated/xe_wa_oob.h>
+#include "instructions/xe_gpu_commands.h"
#include "instructions/xe_mi_commands.h"
#include "regs/xe_engine_regs.h"
-#include "regs/xe_gpu_commands.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_lrc_layout.h"
#include "xe_exec_queue_types.h"
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index c56fedd126e6..337b1ef1959c 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -341,7 +341,7 @@ struct xe_reg_sr;
* };
*/
#define XE_RTP_RULES(...) \
- .n_rules = _XE_COUNT_ARGS(__VA_ARGS__), \
+ .n_rules = COUNT_ARGS(__VA_ARGS__), \
.rules = (const struct xe_rtp_rule[]) { \
XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__)) \
}
@@ -366,7 +366,7 @@ struct xe_reg_sr;
* };
*/
#define XE_RTP_ACTIONS(...) \
- .n_actions = _XE_COUNT_ARGS(__VA_ARGS__), \
+ .n_actions = COUNT_ARGS(__VA_ARGS__), \
.actions = (const struct xe_rtp_action[]) { \
XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \
}
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
index 181b6290fac3..7735f217ba71 100644
--- a/drivers/gpu/drm/xe/xe_rtp_helpers.h
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -10,22 +10,16 @@
#error "This header is supposed to be included by xe_rtp.h only"
#endif
+#include "xe_args.h"
+
/*
* Helper macros - not to be used outside this header.
*/
#define _XE_ESC(...) __VA_ARGS__
-#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,))
-#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_
-
-#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
-#define __XE_FIRST(x_, ...) x_
-#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
-#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__)
-#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
+#define _XE_TUPLE_TAIL(...) (DROP_FIRST_ARG(__VA_ARGS__))
-#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
-#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
+#define _XE_RTP_CONCAT(a, b) CONCATENATE(XE_RTP_, CONCATENATE(a, b))
#define __XE_RTP_PASTE_SEP_COMMA ,
#define __XE_RTP_PASTE_SEP_BITWISE_OR |
@@ -59,11 +53,11 @@
*
* XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
*/
-#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_))
-#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
-#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_RTP_CONCAT(PASTE_, COUNT_ARGS args_)(prefix_, sep_, args_)
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
/*
* XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
@@ -76,6 +70,6 @@
*
* { .a = 10 }
*/
-#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
+#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST_ARG _XE_ESC __VA_ARGS__)
#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 65f1f1628235..2883d9aca404 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -339,6 +339,21 @@ err_out:
}
/**
+ * __xe_sync_ufence_get() - Get user fence from user fence
+ * @ufence: input user fence
+ *
+ * Get a user fence reference from user fence
+ *
+ * Return: xe_user_fence pointer with reference
+ */
+struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence)
+{
+ user_fence_get(ufence);
+
+ return ufence;
+}
+
+/**
* xe_sync_ufence_get() - Get user fence from sync
* @sync: input sync
*
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 3e03396af2c6..006dbf780793 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync)
return !!sync->ufence;
}
+struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence);
struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
void xe_sync_ufence_put(struct xe_user_fence *ufence);
int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 4feb35c95a1c..45035e38388b 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -5,6 +5,7 @@
#include "xe_uc.h"
+#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gsc.h"
#include "xe_gsc_proxy.h"
@@ -215,13 +216,13 @@ void xe_uc_stop_prepare(struct xe_uc *uc)
xe_guc_stop_prepare(&uc->guc);
}
-int xe_uc_stop(struct xe_uc *uc)
+void xe_uc_stop(struct xe_uc *uc)
{
/* GuC submission not enabled, nothing to do */
if (!xe_device_uc_enabled(uc_to_xe(uc)))
- return 0;
+ return;
- return xe_guc_stop(&uc->guc);
+ xe_guc_stop(&uc->guc);
}
int xe_uc_start(struct xe_uc *uc)
@@ -247,17 +248,13 @@ again:
int xe_uc_suspend(struct xe_uc *uc)
{
- int ret;
-
/* GuC submission not enabled, nothing to do */
if (!xe_device_uc_enabled(uc_to_xe(uc)))
return 0;
uc_reset_wait(uc);
- ret = xe_uc_stop(uc);
- if (ret)
- return ret;
+ xe_uc_stop(uc);
return xe_guc_suspend(&uc->guc);
}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index e4d4e3c99f0e..5dfa7725483d 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -16,7 +16,7 @@ int xe_uc_fini_hw(struct xe_uc *uc);
void xe_uc_gucrc_disable(struct xe_uc *uc);
int xe_uc_reset_prepare(struct xe_uc *uc);
void xe_uc_stop_prepare(struct xe_uc *uc);
-int xe_uc_stop(struct xe_uc *uc);
+void xe_uc_stop(struct xe_uc *uc);
int xe_uc_start(struct xe_uc *uc);
int xe_uc_suspend(struct xe_uc *uc);
int xe_uc_sanitize_reset(struct xe_uc *uc);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4aa3943e6f29..c5b1694b292f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -315,19 +315,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
-static void xe_vm_kill(struct xe_vm *vm)
+static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
{
struct xe_exec_queue *q;
lockdep_assert_held(&vm->lock);
- xe_vm_lock(vm, false);
+ if (unlocked)
+ xe_vm_lock(vm, false);
+
vm->flags |= XE_VM_FLAG_BANNED;
trace_xe_vm_kill(vm);
list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
q->ops->kill(q);
- xe_vm_unlock(vm);
+
+ if (unlocked)
+ xe_vm_unlock(vm);
/* TODO: Inform user the VM is banned */
}
@@ -557,7 +561,7 @@ out_unlock_outer:
if (err) {
drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
- xe_vm_kill(vm);
+ xe_vm_kill(vm, true);
}
up_write(&vm->lock);
@@ -708,37 +712,116 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
}
-static struct dma_fence *
-xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs,
- bool first_op, bool last_op);
+static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
+ u8 tile_mask)
+{
+ INIT_LIST_HEAD(&op->link);
+ op->tile_mask = tile_mask;
+ op->base.op = DRM_GPUVA_OP_MAP;
+ op->base.map.va.addr = vma->gpuva.va.addr;
+ op->base.map.va.range = vma->gpuva.va.range;
+ op->base.map.gem.obj = vma->gpuva.gem.obj;
+ op->base.map.gem.offset = vma->gpuva.gem.offset;
+ op->map.vma = vma;
+ op->map.immediate = true;
+ op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
+ op->map.is_null = xe_vma_is_null(vma);
+}
+
+static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
+ u8 tile_mask)
+{
+ struct xe_vma_op *op;
+
+ op = kzalloc(sizeof(*op), GFP_KERNEL);
+ if (!op)
+ return -ENOMEM;
+
+ xe_vm_populate_rebind(op, vma, tile_mask);
+ list_add_tail(&op->link, &vops->list);
+
+ return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+ struct xe_vma_ops *vops);
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+ struct xe_exec_queue *q,
+ struct xe_sync_entry *syncs, u32 num_syncs);
int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
{
struct dma_fence *fence;
struct xe_vma *vma, *next;
+ struct xe_vma_ops vops;
+ struct xe_vma_op *op, *next_op;
+ int err;
lockdep_assert_held(&vm->lock);
- if (xe_vm_in_lr_mode(vm) && !rebind_worker)
+ if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
+ list_empty(&vm->rebind_list))
return 0;
+ xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+
xe_vm_assert_held(vm);
- list_for_each_entry_safe(vma, next, &vm->rebind_list,
- combined_links.rebind) {
+ list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
xe_assert(vm->xe, vma->tile_present);
- list_del_init(&vma->combined_links.rebind);
if (rebind_worker)
trace_xe_vma_rebind_worker(vma);
else
trace_xe_vma_rebind_exec(vma);
- fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
- if (IS_ERR(fence))
- return PTR_ERR(fence);
+
+ err = xe_vm_ops_add_rebind(&vops, vma,
+ vma->tile_present);
+ if (err)
+ goto free_ops;
+ }
+
+ fence = ops_execute(vm, &vops);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ } else {
dma_fence_put(fence);
+ list_for_each_entry_safe(vma, next, &vm->rebind_list,
+ combined_links.rebind)
+ list_del_init(&vma->combined_links.rebind);
+ }
+free_ops:
+ list_for_each_entry_safe(op, next_op, &vops.list, link) {
+ list_del(&op->link);
+ kfree(op);
}
- return 0;
+ return err;
+}
+
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
+{
+ struct dma_fence *fence = NULL;
+ struct xe_vma_ops vops;
+ struct xe_vma_op *op, *next_op;
+ int err;
+
+ lockdep_assert_held(&vm->lock);
+ xe_vm_assert_held(vm);
+ xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+ xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+
+ err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
+ if (err)
+ return ERR_PTR(err);
+
+ fence = ops_execute(vm, &vops);
+
+ list_for_each_entry_safe(op, next_op, &vops.list, link) {
+ list_del(&op->link);
+ kfree(op);
+ }
+
+ return fence;
}
static void xe_vma_free(struct xe_vma *vma)
@@ -805,7 +888,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
for_each_tile(tile, vm->xe, id)
vma->tile_mask |= 0x1 << id;
- if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
+ if (vm->xe->info.has_atomic_enable_pte_bit)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
vma->pat_index = pat_index;
@@ -1173,6 +1256,8 @@ static const struct xe_pt_ops xelp_pt_ops = {
.pde_encode_bo = xelp_pde_encode_bo,
};
+static void vm_destroy_work_func(struct work_struct *w);
+
/**
* xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
* given tile and vm.
@@ -1252,6 +1337,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
init_rwsem(&vm->userptr.notifier_lock);
spin_lock_init(&vm->userptr.invalidated_lock);
+ INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
INIT_LIST_HEAD(&vm->preempt.exec_queues);
vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
@@ -1274,7 +1361,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
drm_gem_object_put(vm_resv_obj);
- err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ err = xe_vm_lock(vm, true);
if (err)
goto err_close;
@@ -1318,7 +1405,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
}
- dma_resv_unlock(xe_vm_resv(vm));
+ xe_vm_unlock(vm);
/* Kernel migration VM shouldn't have a circular loop.. */
if (!(flags & XE_VM_FLAG_MIGRATION)) {
@@ -1360,7 +1447,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
return vm;
err_unlock_close:
- dma_resv_unlock(xe_vm_resv(vm));
+ xe_vm_unlock(vm);
err_close:
xe_vm_close_and_put(vm);
return ERR_PTR(err);
@@ -1489,9 +1576,10 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_vm_put(vm);
}
-static void xe_vm_free(struct drm_gpuvm *gpuvm)
+static void vm_destroy_work_func(struct work_struct *w)
{
- struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+ struct xe_vm *vm =
+ container_of(w, struct xe_vm, destroy_work);
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
u8 id;
@@ -1514,6 +1602,14 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm)
kfree(vm);
}
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
+{
+ struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+
+ /* To destroy the VM we need to be able to sleep */
+ queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
{
struct xe_vm *vm;
@@ -1550,23 +1646,13 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
- int cur_fence = 0, i;
+ int cur_fence = 0;
int number_tiles = hweight8(vma->tile_present);
int err;
u8 id;
trace_xe_vma_unbind(vma);
- if (vma->ufence) {
- struct xe_user_fence * const f = vma->ufence;
-
- if (!xe_sync_ufence_get_status(f))
- return ERR_PTR(-EBUSY);
-
- vma->ufence = NULL;
- xe_sync_ufence_put(f);
- }
-
if (number_tiles > 1) {
fences = kmalloc_array(number_tiles, sizeof(*fences),
GFP_KERNEL);
@@ -1608,10 +1694,6 @@ next:
fence = cf ? &cf->base : !fence ?
xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
- if (last_op) {
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], fence);
- }
return fence;
@@ -1628,15 +1710,15 @@ err_fences:
static struct dma_fence *
xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs,
- bool first_op, bool last_op)
+ u8 tile_mask, bool first_op, bool last_op)
{
struct xe_tile *tile;
struct dma_fence *fence;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
struct xe_vm *vm = xe_vma_vm(vma);
- int cur_fence = 0, i;
- int number_tiles = hweight8(vma->tile_mask);
+ int cur_fence = 0;
+ int number_tiles = hweight8(tile_mask);
int err;
u8 id;
@@ -1650,7 +1732,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
}
for_each_tile(tile, vm->xe, id) {
- if (!(vma->tile_mask & BIT(id)))
+ if (!(tile_mask & BIT(id)))
goto next;
fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
@@ -1682,12 +1764,6 @@ next:
}
}
- if (last_op) {
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i],
- cf ? &cf->base : fence);
- }
-
return cf ? &cf->base : fence;
err_fences:
@@ -1715,87 +1791,46 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
return NULL;
}
-static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, struct xe_sync_entry *syncs,
- u32 num_syncs, bool immediate, bool first_op,
- bool last_op)
+static struct dma_fence *
+xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
+ struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
+ u8 tile_mask, bool immediate, bool first_op, bool last_op)
{
struct dma_fence *fence;
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
- struct xe_user_fence *ufence;
xe_vm_assert_held(vm);
-
- ufence = find_ufence_get(syncs, num_syncs);
- if (vma->ufence && ufence)
- xe_sync_ufence_put(vma->ufence);
-
- vma->ufence = ufence ?: vma->ufence;
+ xe_bo_assert_held(bo);
if (immediate) {
- fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
- last_op);
+ fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
+ first_op, last_op);
if (IS_ERR(fence))
- return PTR_ERR(fence);
+ return fence;
} else {
- int i;
-
xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
- if (last_op) {
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], fence);
- }
- }
-
- if (last_op)
- xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
- dma_fence_put(fence);
-
- return 0;
-}
-
-static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_bo *bo, struct xe_sync_entry *syncs,
- u32 num_syncs, bool immediate, bool first_op,
- bool last_op)
-{
- int err;
-
- xe_vm_assert_held(vm);
- xe_bo_assert_held(bo);
-
- if (bo && immediate) {
- err = xe_bo_validate(bo, vm, true);
- if (err)
- return err;
}
- return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
- last_op);
+ return fence;
}
-static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, struct xe_sync_entry *syncs,
- u32 num_syncs, bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+ u32 num_syncs, bool first_op, bool last_op)
{
struct dma_fence *fence;
- struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
xe_vm_assert_held(vm);
xe_bo_assert_held(xe_vma_bo(vma));
fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
if (IS_ERR(fence))
- return PTR_ERR(fence);
-
- xe_vma_destroy(vma, fence);
- if (last_op)
- xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
- dma_fence_put(fence);
+ return fence;
- return 0;
+ return fence;
}
#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
@@ -1938,40 +1973,18 @@ static const u32 region_to_mem_type[] = {
XE_PL_VRAM1,
};
-static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, u32 region,
- struct xe_sync_entry *syncs, u32 num_syncs,
- bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+ u32 num_syncs, bool first_op, bool last_op)
{
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
- int err;
-
- xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
-
- if (!xe_vma_has_no_bo(vma)) {
- err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
- if (err)
- return err;
- }
if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
- true, first_op, last_op);
+ vma->tile_mask, true, first_op, last_op);
} else {
- int i;
-
- /* Nothing to do, signal fences now */
- if (last_op) {
- for (i = 0; i < num_syncs; i++) {
- struct dma_fence *fence =
- xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-
- xe_sync_entry_signal(&syncs[i], fence);
- dma_fence_put(fence);
- }
- }
-
- return 0;
+ return xe_exec_queue_last_fence_get(wait_exec_queue, vm);
}
}
@@ -2267,23 +2280,28 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
struct drm_gpuva_ops *ops,
struct xe_sync_entry *syncs, u32 num_syncs,
- struct list_head *ops_list, bool last)
+ struct xe_vma_ops *vops, bool last)
{
struct xe_device *xe = vm->xe;
struct xe_vma_op *last_op = NULL;
struct drm_gpuva_op *__op;
+ struct xe_tile *tile;
+ u8 id, tile_mask = 0;
int err = 0;
lockdep_assert_held_write(&vm->lock);
+ for_each_tile(tile, vm->xe, id)
+ tile_mask |= 0x1 << id;
+
drm_gpuva_for_each_op(__op, ops) {
struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
struct xe_vma *vma;
- bool first = list_empty(ops_list);
+ bool first = list_empty(&vops->list);
unsigned int flags = 0;
INIT_LIST_HEAD(&op->link);
- list_add_tail(&op->link, ops_list);
+ list_add_tail(&op->link, &vops->list);
if (first) {
op->flags |= XE_VMA_OP_FIRST;
@@ -2292,6 +2310,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
}
op->q = q;
+ op->tile_mask = tile_mask;
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
@@ -2409,12 +2428,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
}
/* FIXME: Unhandled corner case */
- XE_WARN_ON(!last_op && last && !list_empty(ops_list));
+ XE_WARN_ON(!last_op && last && !list_empty(&vops->list));
if (!last_op)
return 0;
- last_op->ops = ops;
if (last) {
last_op->flags |= XE_VMA_OP_LAST;
last_op->num_syncs = num_syncs;
@@ -2424,27 +2442,24 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
return 0;
}
-static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
- struct xe_vma *vma, struct xe_vma_op *op)
+static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_vma_op *op)
{
- int err;
-
- lockdep_assert_held_write(&vm->lock);
+ struct dma_fence *fence = NULL;
- err = xe_vm_lock_vma(exec, vma);
- if (err)
- return err;
+ lockdep_assert_held(&vm->lock);
xe_vm_assert_held(vm);
xe_bo_assert_held(xe_vma_bo(vma));
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
- op->syncs, op->num_syncs,
- op->map.immediate || !xe_vm_in_fault_mode(vm),
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
+ fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
+ op->syncs, op->num_syncs,
+ op->tile_mask,
+ op->map.immediate || !xe_vm_in_fault_mode(vm),
+ op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST);
break;
case DRM_GPUVA_OP_REMAP:
{
@@ -2454,37 +2469,41 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
if (!op->remap.unmap_done) {
if (prev || next)
vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
- err = xe_vm_unbind(vm, vma, op->q, op->syncs,
- op->num_syncs,
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST &&
- !prev && !next);
- if (err)
+ fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+ op->num_syncs,
+ op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST &&
+ !prev && !next);
+ if (IS_ERR(fence))
break;
op->remap.unmap_done = true;
}
if (prev) {
op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
- err = xe_vm_bind(vm, op->remap.prev, op->q,
- xe_vma_bo(op->remap.prev), op->syncs,
- op->num_syncs, true, false,
- op->flags & XE_VMA_OP_LAST && !next);
+ dma_fence_put(fence);
+ fence = xe_vm_bind(vm, op->remap.prev, op->q,
+ xe_vma_bo(op->remap.prev), op->syncs,
+ op->num_syncs,
+ op->remap.prev->tile_mask, true,
+ false,
+ op->flags & XE_VMA_OP_LAST && !next);
op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
- if (err)
+ if (IS_ERR(fence))
break;
op->remap.prev = NULL;
}
if (next) {
op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
- err = xe_vm_bind(vm, op->remap.next, op->q,
- xe_vma_bo(op->remap.next),
- op->syncs, op->num_syncs,
- true, false,
- op->flags & XE_VMA_OP_LAST);
+ dma_fence_put(fence);
+ fence = xe_vm_bind(vm, op->remap.next, op->q,
+ xe_vma_bo(op->remap.next),
+ op->syncs, op->num_syncs,
+ op->remap.next->tile_mask, true,
+ false, op->flags & XE_VMA_OP_LAST);
op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
- if (err)
+ if (IS_ERR(fence))
break;
op->remap.next = NULL;
}
@@ -2492,43 +2511,35 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
break;
}
case DRM_GPUVA_OP_UNMAP:
- err = xe_vm_unbind(vm, vma, op->q, op->syncs,
- op->num_syncs, op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
+ fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+ op->num_syncs, op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST);
break;
case DRM_GPUVA_OP_PREFETCH:
- err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
- op->syncs, op->num_syncs,
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
+ fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs,
+ op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST);
break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
- if (err)
+ if (IS_ERR(fence))
trace_xe_vma_fail(vma);
- return err;
+ return fence;
}
-static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_vma_op *op)
+static struct dma_fence *
+__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_vma_op *op)
{
- struct drm_exec exec;
+ struct dma_fence *fence;
int err;
retry_userptr:
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
- drm_exec_until_all_locked(&exec) {
- err = op_execute(&exec, vm, vma, op);
- drm_exec_retry_on_contention(&exec);
- if (err)
- break;
- }
- drm_exec_fini(&exec);
-
- if (err == -EAGAIN) {
+ fence = op_execute(vm, vma, op);
+ if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
lockdep_assert_held_write(&vm->lock);
if (op->base.op == DRM_GPUVA_OP_REMAP) {
@@ -2545,22 +2556,24 @@ retry_userptr:
if (!err)
goto retry_userptr;
+ fence = ERR_PTR(err);
trace_xe_vma_fail(vma);
}
}
- return err;
+ return fence;
}
-static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
+static struct dma_fence *
+xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
{
- int ret = 0;
+ struct dma_fence *fence = ERR_PTR(-ENOMEM);
- lockdep_assert_held_write(&vm->lock);
+ lockdep_assert_held(&vm->lock);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- ret = __xe_vma_op_execute(vm, op->map.vma, op);
+ fence = __xe_vma_op_execute(vm, op->map.vma, op);
break;
case DRM_GPUVA_OP_REMAP:
{
@@ -2573,42 +2586,23 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
else
vma = op->remap.next;
- ret = __xe_vma_op_execute(vm, vma, op);
+ fence = __xe_vma_op_execute(vm, vma, op);
break;
}
case DRM_GPUVA_OP_UNMAP:
- ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
- op);
+ fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+ op);
break;
case DRM_GPUVA_OP_PREFETCH:
- ret = __xe_vma_op_execute(vm,
- gpuva_to_vma(op->base.prefetch.va),
- op);
+ fence = __xe_vma_op_execute(vm,
+ gpuva_to_vma(op->base.prefetch.va),
+ op);
break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
- return ret;
-}
-
-static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
-{
- bool last = op->flags & XE_VMA_OP_LAST;
-
- if (last) {
- while (op->num_syncs--)
- xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
- kfree(op->syncs);
- if (op->q)
- xe_exec_queue_put(op->q);
- }
- if (!list_empty(&op->link))
- list_del(&op->link);
- if (op->ops)
- drm_gpuva_ops_free(&vm->gpuvm, op->ops);
- if (last)
- xe_vm_put(vm);
+ return fence;
}
static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
@@ -2687,34 +2681,223 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
op->flags & XE_VMA_OP_PREV_COMMITTED,
op->flags & XE_VMA_OP_NEXT_COMMITTED);
}
+ }
+}
+
+static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
+ bool validate)
+{
+ struct xe_bo *bo = xe_vma_bo(vma);
+ int err = 0;
- drm_gpuva_ops_free(&vm->gpuvm, __ops);
+ if (bo) {
+ if (!bo->vm)
+ err = drm_exec_lock_obj(exec, &bo->ttm.base);
+ if (!err && validate)
+ err = xe_bo_validate(bo, xe_vma_vm(vma), true);
}
+
+ return err;
}
-static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
- struct list_head *ops_list)
+static int check_ufence(struct xe_vma *vma)
+{
+ if (vma->ufence) {
+ struct xe_user_fence * const f = vma->ufence;
+
+ if (!xe_sync_ufence_get_status(f))
+ return -EBUSY;
+
+ vma->ufence = NULL;
+ xe_sync_ufence_put(f);
+ }
+
+ return 0;
+}
+
+static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
+ struct xe_vma_op *op)
+{
+ int err = 0;
+
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ err = vma_lock_and_validate(exec, op->map.vma,
+ !xe_vm_in_fault_mode(vm) ||
+ op->map.immediate);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
+ if (err)
+ break;
+
+ err = vma_lock_and_validate(exec,
+ gpuva_to_vma(op->base.remap.unmap->va),
+ false);
+ if (!err && op->remap.prev)
+ err = vma_lock_and_validate(exec, op->remap.prev, true);
+ if (!err && op->remap.next)
+ err = vma_lock_and_validate(exec, op->remap.next, true);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ err = check_ufence(gpuva_to_vma(op->base.unmap.va));
+ if (err)
+ break;
+
+ err = vma_lock_and_validate(exec,
+ gpuva_to_vma(op->base.unmap.va),
+ false);
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ {
+ struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+ u32 region = op->prefetch.region;
+
+ xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
+ err = vma_lock_and_validate(exec,
+ gpuva_to_vma(op->base.prefetch.va),
+ false);
+ if (!err && !xe_vma_has_no_bo(vma))
+ err = xe_bo_migrate(xe_vma_bo(vma),
+ region_to_mem_type[region]);
+ break;
+ }
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+ }
+
+ return err;
+}
+
+static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
+ struct xe_vm *vm,
+ struct xe_vma_ops *vops)
+{
+ struct xe_vma_op *op;
+ int err;
+
+ err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+ if (err)
+ return err;
+
+ list_for_each_entry(op, &vops->list, link) {
+ err = op_lock_and_prep(exec, vm, op);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+ struct xe_vma_ops *vops)
{
struct xe_vma_op *op, *next;
+ struct dma_fence *fence = NULL;
+
+ list_for_each_entry_safe(op, next, &vops->list, link) {
+ dma_fence_put(fence);
+ fence = xe_vma_op_execute(vm, op);
+ if (IS_ERR(fence)) {
+ drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
+ op->base.op, PTR_ERR(fence));
+ fence = ERR_PTR(-ENOSPC);
+ break;
+ }
+ }
+
+ return fence;
+}
+
+static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
+{
+ if (vma->ufence)
+ xe_sync_ufence_put(vma->ufence);
+ vma->ufence = __xe_sync_ufence_get(ufence);
+}
+
+static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
+ struct xe_user_fence *ufence)
+{
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ vma_add_ufence(op->map.vma, ufence);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ if (op->remap.prev)
+ vma_add_ufence(op->remap.prev, ufence);
+ if (op->remap.next)
+ vma_add_ufence(op->remap.next, ufence);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
+ break;
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+ }
+}
+
+static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
+ struct dma_fence *fence)
+{
+ struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+ struct xe_user_fence *ufence;
+ struct xe_vma_op *op;
+ int i;
+
+ ufence = find_ufence_get(vops->syncs, vops->num_syncs);
+ list_for_each_entry(op, &vops->list, link) {
+ if (ufence)
+ op_add_ufence(vm, op, ufence);
+
+ if (op->base.op == DRM_GPUVA_OP_UNMAP)
+ xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
+ else if (op->base.op == DRM_GPUVA_OP_REMAP)
+ xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+ fence);
+ }
+ if (ufence)
+ xe_sync_ufence_put(ufence);
+ for (i = 0; i < vops->num_syncs; i++)
+ xe_sync_entry_signal(vops->syncs + i, fence);
+ xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+ dma_fence_put(fence);
+}
+
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+ struct xe_vma_ops *vops)
+{
+ struct drm_exec exec;
+ struct dma_fence *fence;
int err;
lockdep_assert_held_write(&vm->lock);
- list_for_each_entry_safe(op, next, ops_list, link) {
- err = xe_vma_op_execute(vm, op);
- if (err) {
- drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
- op->base.op, err);
- /*
- * FIXME: Killing VM rather than proper error handling
- */
- xe_vm_kill(vm);
- return -ENOSPC;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ goto unlock;
+
+ fence = ops_execute(vm, vops);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ /* FIXME: Killing VM rather than proper error handling */
+ xe_vm_kill(vm, false);
+ goto unlock;
+ } else {
+ vm_bind_ioctl_ops_fini(vm, vops, fence);
}
- xe_vma_op_cleanup(vm, op);
}
- return 0;
+unlock:
+ drm_exec_fini(&exec);
+ return err;
}
#define SUPPORTED_FLAGS \
@@ -2862,6 +3045,58 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
return err;
}
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+ struct xe_exec_queue *q,
+ struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ memset(vops, 0, sizeof(*vops));
+ INIT_LIST_HEAD(&vops->list);
+ vops->vm = vm;
+ vops->q = q;
+ vops->syncs = syncs;
+ vops->num_syncs = num_syncs;
+}
+
+static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
+ u64 addr, u64 range, u64 obj_offset,
+ u16 pat_index)
+{
+ u16 coh_mode;
+
+ if (XE_IOCTL_DBG(xe, range > bo->size) ||
+ XE_IOCTL_DBG(xe, obj_offset >
+ bo->size - range)) {
+ return -EINVAL;
+ }
+
+ if (bo->flags & XE_BO_FLAG_INTERNAL_64K) {
+ if (XE_IOCTL_DBG(xe, obj_offset &
+ XE_64K_PAGE_MASK) ||
+ XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+ XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
+ return -EINVAL;
+ }
+ }
+
+ coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ if (bo->cpu_caching) {
+ if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+ bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+ return -EINVAL;
+ }
+ } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+ /*
+ * Imported dma-buf from a different device should
+ * require 1way or 2way coherency since we don't know
+ * how it was mapped on the CPU. Just assume is it
+ * potentially cached on CPU side.
+ */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -2875,7 +3110,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u32 num_syncs, num_ufence = 0;
struct xe_sync_entry *syncs = NULL;
struct drm_xe_vm_bind_op *bind_ops;
- LIST_HEAD(ops_list);
+ struct xe_vma_ops vops;
int err;
int i;
@@ -2945,7 +3180,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u32 obj = bind_ops[i].obj;
u64 obj_offset = bind_ops[i].obj_offset;
u16 pat_index = bind_ops[i].pat_index;
- u16 coh_mode;
if (!obj)
continue;
@@ -2957,40 +3191,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
bos[i] = gem_to_xe_bo(gem_obj);
- if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
- XE_IOCTL_DBG(xe, obj_offset >
- bos[i]->size - range)) {
- err = -EINVAL;
- goto put_obj;
- }
-
- if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) {
- if (XE_IOCTL_DBG(xe, obj_offset &
- XE_64K_PAGE_MASK) ||
- XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
- XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
- err = -EINVAL;
- goto put_obj;
- }
- }
-
- coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
- if (bos[i]->cpu_caching) {
- if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
- bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
- err = -EINVAL;
- goto put_obj;
- }
- } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
- /*
- * Imported dma-buf from a different device should
- * require 1way or 2way coherency since we don't know
- * how it was mapped on the CPU. Just assume is it
- * potentially cached on CPU side.
- */
- err = -EINVAL;
+ err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
+ obj_offset, pat_index);
+ if (err)
goto put_obj;
- }
}
if (args->num_syncs) {
@@ -3026,6 +3230,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto free_syncs;
}
+ xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
for (i = 0; i < args->num_binds; ++i) {
u64 range = bind_ops[i].range;
u64 addr = bind_ops[i].addr;
@@ -3045,42 +3250,25 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
- &ops_list,
- i == args->num_binds - 1);
+ &vops, i == args->num_binds - 1);
if (err)
goto unwind_ops;
}
/* Nothing to do */
- if (list_empty(&ops_list)) {
+ if (list_empty(&vops.list)) {
err = -ENODATA;
goto unwind_ops;
}
- xe_vm_get(vm);
- if (q)
- xe_exec_queue_get(q);
-
- err = vm_bind_ioctl_ops_execute(vm, &ops_list);
-
- up_write(&vm->lock);
-
- if (q)
- xe_exec_queue_put(q);
- xe_vm_put(vm);
-
- for (i = 0; bos && i < args->num_binds; ++i)
- xe_bo_put(bos[i]);
-
- kvfree(bos);
- kvfree(ops);
- if (args->num_binds > 1)
- kvfree(bind_ops);
-
- return err;
+ err = vm_bind_ioctl_ops_execute(vm, &vops);
unwind_ops:
- vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+ if (err && err != -ENODATA)
+ vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+ for (i = args->num_binds - 1; i >= 0; --i)
+ if (ops[i])
+ drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
free_syncs:
if (err == -ENODATA)
err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
@@ -3339,7 +3527,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
}
if (bo) {
- dma_resv_lock(bo->ttm.base.resv, NULL);
+ xe_bo_lock(bo, false);
err = ttm_bo_vmap(&bo->ttm, &src);
if (!err) {
xe_map_memcpy_from(xe_bo_device(bo),
@@ -3348,7 +3536,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
snap->snap[i].len);
ttm_bo_vunmap(&bo->ttm, &src);
}
- dma_resv_unlock(bo->ttm.base.resv);
+ xe_bo_unlock(bo);
} else {
void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 306cd0934a19..3ac9021f970e 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -6,6 +6,7 @@
#ifndef _XE_VM_H_
#define _XE_VM_H_
+#include "xe_assert.h"
#include "xe_bo_types.h"
#include "xe_macros.h"
#include "xe_map.h"
@@ -208,6 +209,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
int xe_vm_userptr_check_repin(struct xe_vm *vm);
int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
+ u8 tile_mask);
int xe_vm_invalidate_vma(struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index bdc6659891a5..4d33f310b653 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -25,7 +25,7 @@
* VM bind (create GPU mapping for a BO or userptr)
* ================================================
*
- * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same
+ * Creates GPU mappings for a BO or userptr within a VM. VM binds uses the same
* in / out fence interface (struct drm_xe_sync) as execs which allows users to
* think of binds and execs as more or less the same operation.
*
@@ -190,8 +190,8 @@
* Deferred binds in fault mode
* ----------------------------
*
- * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
- * create mappings are by default are deferred to the page fault handler (first
+ * If a VM is in fault mode (TODO: link to fault mode), new bind operations that
+ * create mappings are by default deferred to the page fault handler (first
* use). This behavior can be overriden by setting the flag
* DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
* immediately.
@@ -225,7 +225,7 @@
*
* A VM in compute mode enables long running workloads and ultra low latency
* submission (ULLS). ULLS is implemented via a continuously running batch +
- * semaphores. This enables to the user to insert jump to new batch commands
+ * semaphores. This enables the user to insert jump to new batch commands
* into the continuously running batch. In both cases these batches exceed the
* time a dma fence is allowed to exist for before signaling, as such dma fences
* are not used when a VM is in compute mode. User fences (TODO: link user fence
@@ -244,7 +244,7 @@
* Once all preempt fences are signaled for a VM the kernel can safely move the
* memory and kick the rebind worker which resumes all the engines execution.
*
- * A preempt fence, for every engine using the VM, is installed the VM's
+ * A preempt fence, for every engine using the VM, is installed into the VM's
* dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every
* engine using the VM, is also installed into the same dma-resv slot of every
* external BO mapped in the VM.
@@ -314,7 +314,7 @@
* signaling, and memory allocation is usually required to resolve a page
* fault, but memory allocation is not allowed to gate dma fence signaling. As
* such, dma fences are not allowed when VM is in fault mode. Because dma-fences
- * are not allowed, long running workloads and ULLS are enabled on a faulting
+ * are not allowed, only long running workloads and ULLS are enabled on a faulting
* VM.
*
* Defered VM binds
@@ -399,14 +399,14 @@
* Notice no rebind is issued in the access counter handler as the rebind will
* be issued on next page fault.
*
- * Cavets with eviction / user pointer invalidation
- * ------------------------------------------------
+ * Caveats with eviction / user pointer invalidation
+ * -------------------------------------------------
*
* In the case of eviction and user pointer invalidation on a faulting VM, there
* is no need to issue a rebind rather we just need to blow away the page tables
* for the VMAs and the page fault handler will rebind the VMAs when they fault.
- * The cavet is to update / read the page table structure the VM global lock is
- * neeeed. In both the case of eviction and user pointer invalidation locks are
+ * The caveat is to update / read the page table structure the VM global lock is
+ * needed. In both the case of eviction and user pointer invalidation locks are
* held which make acquiring the VM global lock impossible. To work around this
* every VMA maintains a list of leaf page table entries which should be written
* to zero to blow away the VMA's page tables. After writing zero to these
@@ -427,9 +427,9 @@
* VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects
* the list of userptrs mapped in the VM, the list of engines using this VM, and
* the array of external BOs mapped in the VM. When adding or removing any of the
- * aforemented state from the VM should acquire this lock in write mode. The VM
+ * aforementioned state from the VM should acquire this lock in write mode. The VM
* bind path also acquires this lock in write while the exec / compute mode
- * rebind worker acquire this lock in read mode.
+ * rebind worker acquires this lock in read mode.
*
* VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
* slots which is shared with any private BO in the VM. Expected to be acquired
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 72a100671e5d..ce1a63a5e3e7 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -178,6 +178,13 @@ struct xe_vm {
struct list_head rebind_list;
/**
+ * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
+ * from an irq context can be last put and the destroy needs to be able
+ * to sleep.
+ */
+ struct work_struct destroy_work;
+
+ /**
* @rftree: range fence tree to track updates to page table structure.
* Used to implement conflict tracking between independent bind engines.
*/
@@ -323,11 +330,6 @@ enum xe_vma_op_flags {
struct xe_vma_op {
/** @base: GPUVA base operation */
struct drm_gpuva_op base;
- /**
- * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
- * operations is processed
- */
- struct drm_gpuva_ops *ops;
/** @q: exec queue for this operation */
struct xe_exec_queue *q;
/**
@@ -341,6 +343,8 @@ struct xe_vma_op {
struct list_head link;
/** @flags: operation flags */
enum xe_vma_op_flags flags;
+ /** @tile_mask: Tile mask for operation */
+ u8 tile_mask;
union {
/** @map: VMA map operation specific data */
@@ -351,4 +355,19 @@ struct xe_vma_op {
struct xe_vma_op_prefetch prefetch;
};
};
+
+/** struct xe_vma_ops - VMA operations */
+struct xe_vma_ops {
+ /** @list: list of VMA operations */
+ struct list_head list;
+ /** @vm: VM */
+ struct xe_vm *vm;
+ /** @q: exec queue these operations */
+ struct xe_exec_queue *q;
+ /** @syncs: syncs these operation */
+ struct xe_sync_entry *syncs;
+ /** @num_syncs: number of syncs */
+ u32 num_syncs;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index dd214d95e4b6..05db53c1448c 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -533,6 +533,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS))
},
+ { XE_RTP_NAME("14021402888"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
+ },
/* Xe2_HPM */
@@ -668,6 +672,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
},
+ { XE_RTP_NAME("14021567978"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
+ ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
+ },
/* Xe2_HPG */
{ XE_RTP_NAME("15010599737"),
@@ -682,6 +691,15 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
},
+ { XE_RTP_NAME("14021490052"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(FF_MODE,
+ DIS_MESH_PARTIAL_AUTOSTRIP |
+ DIS_MESH_AUTOSTRIP),
+ SET(VFLSKPD,
+ DIS_PARTIAL_AUTOSTRIP |
+ DIS_AUTOSTRIP))
+ },
{}
};