diff options
author | Danilo Krummrich <dakr@redhat.com> | 2024-05-13 22:28:28 +0200 |
---|---|---|
committer | Danilo Krummrich <dakr@redhat.com> | 2024-05-13 22:28:28 +0200 |
commit | badc4e6b9af54d67dfa62aa9721e3d43689065d9 (patch) | |
tree | c978861e27bcb88b07b08434e626ab6ba1f1c2d8 | |
parent | 014aeee56a21c2b210a2808ecccfa1aedfe51bea (diff) | |
parent | 664de50cbfae048d08e9f3c1c0da377d1269e6d1 (diff) |
Merge remote-tracking branch 'drm-xe/drm-xe-next' into drm-tip
# Conflicts:
# drivers/gpu/drm/xe/xe_device.h
# drivers/gpu/drm/xe/xe_vm.c
93 files changed, 3757 insertions, 735 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index ae579b6c8763..b620389761d5 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -163,7 +163,9 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ + xe_gt_sriov_pf_debugfs.o \ xe_gt_sriov_pf_policy.o \ + xe_gt_sriov_pf_service.o \ xe_lmtt.o \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 511cf974d585..5c1d40432ca0 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -194,14 +194,18 @@ enum { * granularity) since the GPUs clock time runs off a different crystal * from the CPUs clock. Changing this KLV on a VF that is currently * running a context wont take effect until a new context is scheduled in. - * That said, when the PF is changing this value from 0xFFFFFFFF to - * something else, it might never take effect if the VF is running an - * inifinitely long compute or shader kernel. In such a scenario, the + * That said, when the PF is changing this value from 0x0 to + * a non-zero value, it might never take effect if the VF is running an + * infinitely long compute or shader kernel. In such a scenario, the * PF would need to trigger a VM PAUSE and then change the KLV to force * it to take effect. Such cases might typically happen on a 1PF+1VF * Virtualization config enabled for heavier workloads like AI/ML. * + * The max value for this KLV is 100 seconds, anything exceeding that + * will be clamped to the max. + * * :0: infinite exec quantum (default) + * :100000: maximum exec quantum (100000ms == 100s) * * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02 * This config sets the VF-preemption-timeout in microseconds. @@ -211,15 +215,19 @@ enum { * different crystal from the CPUs clock. Changing this KLV on a VF * that is currently running a context wont take effect until a new * context is scheduled in. - * That said, when the PF is changing this value from 0xFFFFFFFF to - * something else, it might never take effect if the VF is running an - * inifinitely long compute or shader kernel. + * That said, when the PF is changing this value from 0x0 to + * a non-zero value, it might never take effect if the VF is running an + * infinitely long compute or shader kernel. * In this case, the PF would need to trigger a VM PAUSE and then change * the KLV to force it to take effect. Such cases might typically happen * on a 1PF+1VF Virtualization config enabled for heavier workloads like * AI/ML. * + * The max value for this KLV is 100 seconds, anything exceeding that + * will be clamped to the max. + * * :0: no preemption timeout (default) + * :100000000: maximum preemption timeout (100000000us == 100s) * * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03 * This config sets threshold for CAT errors caused by the VF. @@ -291,9 +299,11 @@ enum { #define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01 #define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE 100000u -#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 -#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE 100000000u #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03 #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h index 747e428de421..6c2834613081 100644 --- a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h @@ -1,11 +1,179 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright © 2023 Intel Corporation + * Copyright © 2023-2024 Intel Corporation */ #ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_ #define _ABI_GUC_RELAY_ACTIONS_ABI_H_ +#include "abi/guc_relay_communication_abi.h" + +/** + * DOC: GuC Relay VF/PF ABI Version + * + * The _`GUC_RELAY_VERSION_BASE` defines minimum VF/PF ABI version that + * drivers must support. Currently this is version 1.0. + * + * The _`GUC_RELAY_VERSION_LATEST` defines latest VF/PF ABI version that + * drivers may use. Currently this is version 1.0. + * + * Some platforms may require different base VF/PF ABI version. + * No supported VF/PF ABI version can be 0.0. + */ + +#define GUC_RELAY_VERSION_BASE_MAJOR 1 +#define GUC_RELAY_VERSION_BASE_MINOR 0 + +#define GUC_RELAY_VERSION_LATEST_MAJOR 1 +#define GUC_RELAY_VERSION_LATEST_MINOR 0 + +/** + * DOC: GuC Relay Actions + * + * The following actions are supported from VF/PF ABI version 1.0: + * + * * `VF2PF_HANDSHAKE`_ + * * `VF2PF_QUERY_RUNTIME`_ + */ + +/** + * DOC: VF2PF_HANDSHAKE + * + * This `Relay Message`_ is used by the VF to establish ABI version with the PF. + * + * Prior to exchanging any other messages, both VF driver and PF driver must + * negotiate the VF/PF ABI version that will be used in their communication. + * + * The VF driver shall use @MAJOR and @MINOR fields to pass requested ABI version. + * The VF driver may use special version 0.0 (both @MAJOR and @MINOR set to 0) + * to request latest (or any) ABI version that is supported by the PF driver. + * + * This message definition shall be supported by all future ABI versions. + * This message definition shall not be changed by future ABI versions. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_HANDSHAKE` = 0x0001 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | **MAJOR** - requested major version of the VFPF interface | + * | | | (use MAJOR_ANY to request latest version supported by PF) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **MINOR** - requested minor version of the VFPF interface | + * | | | (use MINOR_ANY to request latest version supported by PF) | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | **MAJOR** - agreed major version of the VFPF interface | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **MINOR** - agreed minor version of the VFPF interface | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_RELAY_ACTION_VF2PF_HANDSHAKE 0x0001u + +#define VF2PF_HANDSHAKE_REQUEST_MSG_LEN 2u +#define VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR (0xffffu << 16) +#define VF2PF_HANDSHAKE_MAJOR_ANY 0 +#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR (0xffffu << 0) +#define VF2PF_HANDSHAKE_MINOR_ANY 0 + +#define VF2PF_HANDSHAKE_RESPONSE_MSG_LEN 2u +#define VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR (0xffffu << 16) +#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR (0xffffu << 0) + +/** + * DOC: VF2PF_QUERY_RUNTIME + * + * This `Relay Message`_ is used by the VF to query values of runtime registers. + * + * On some platforms, VF drivers may not have access to the some fuse registers + * (referred here as 'runtime registers') and therefore VF drivers need to ask + * the PF driver to obtain their values. + * + * However, the list of such registers, and their values, is fully owned and + * maintained by the PF driver and the VF driver may only initiate the query + * sequence and indicate in the @START field the starting index of the next + * requested register from this predefined list. + * + * In the response, the PF driver will return tuple of 32-bit register offset and + * the 32-bit value of that register (respectively @REG_OFFSET and @REG_VALUE). + * + * The VF driver can use @LIMIT field to limit number of returned register tuples. + * If @LIMIT is unset then PF decides about number of returned register tuples. + * + * This message definition is supported from ABI version 1.0. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = **LIMIT** - limit number of returned entries | + * | | | (use zero to not enforce any limits on the response) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME` = 0x0101 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **START** - index of the first requested entry | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = **COUNT** - number of entries included in response | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **REMAINING** - number of remaining entries | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | DATA2 = **REG_OFFSET** - offset of register[START] | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | DATA3 = **REG_VALUE** - value of register[START] | + * +---+-------+--------------------------------------------------------------+ + * | | | | + * +---+-------+--------------------------------------------------------------+ + * |n-1| 31:0 | REG_OFFSET - offset of register[START + x] | + * +---+-------+--------------------------------------------------------------+ + * | n | 31:0 | REG_VALUE - value of register[START + x] | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME 0x0101u + +#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN 2u +#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2PF_QUERY_RUNTIME_NO_LIMIT 0u +#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START GUC_HXG_REQUEST_MSG_n_DATAn + +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN (GUC_HXG_MSG_MIN_LEN + 1u) +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN \ + (VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + VF2PF_QUERY_RUNTIME_MAX_COUNT * 2) +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0 +#define VF2PF_QUERY_RUNTIME_MIN_COUNT 0 +#define VF2PF_QUERY_RUNTIME_MAX_COUNT \ + ((GUC_RELAY_MSG_MAX_LEN - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2) +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING GUC_HXG_RESPONSE_MSG_n_DATAn +#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_OFFSETx GUC_HXG_RESPONSE_MSG_n_DATAn +#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_VALUEx GUC_HXG_RESPONSE_MSG_n_DATAn + /** * DOC: GuC Relay Debug Actions * diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index d46f87a039f2..eb67ecf08db2 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -13,6 +13,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_device_types.h" +#include "xe_force_wake.h" #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index a255946b6f77..a255946b6f77 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index af71b87d8030..263ffc7bc2ef 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -44,9 +44,10 @@ #define GSCCS_RING_BASE 0x11a000 #define RING_TAIL(base) XE_REG((base) + 0x30) +#define TAIL_ADDR REG_GENMASK(20, 3) #define RING_HEAD(base) XE_REG((base) + 0x34) -#define HEAD_ADDR 0x001FFFFC +#define HEAD_ADDR REG_GENMASK(20, 2) #define RING_START(base) XE_REG((base) + 0x38) @@ -54,6 +55,8 @@ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ +#define RING_START_UDW(base) XE_REG((base) + 0x48) + #define RING_PSMI_CTL(base) XE_REG((base) + 0x50, XE_REG_OPTION_MASKED) #define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) #define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) @@ -65,6 +68,7 @@ #define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c) #define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60) #define RING_IPEHR(base) XE_REG((base) + 0x68) +#define RING_INSTDONE(base) XE_REG((base) + 0x6c) #define RING_ACTHD(base) XE_REG((base) + 0x74) #define RING_DMA_FADD(base) XE_REG((base) + 0x78) #define RING_HWS_PGA(base) XE_REG((base) + 0x80) @@ -108,6 +112,8 @@ #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define REPLAY_MODE_GRANULARITY REG_BIT(0) +#define INDIRECT_RING_STATE(base) XE_REG((base) + 0x108) + #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) @@ -123,6 +129,7 @@ #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) +#define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) @@ -135,7 +142,6 @@ #define RING_VALID_MASK 0x00000001 #define RING_VALID 0x00000001 #define STOP_RING REG_BIT(8) -#define TAIL_ADDR 0x001FFFF8 #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) #define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 26fb4943c79e..116224a9ccae 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -59,6 +59,27 @@ #define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) #define XEHP_GLOBAL_MOCS(i) XE_REG_MCR(0x4000 + (i) * 4) +#define LE_SSE_MASK REG_GENMASK(18, 17) +#define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value) +#define LE_COS_MASK REG_GENMASK(16, 15) +#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK) +#define LE_SCF_MASK REG_BIT(14) +#define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value) +#define LE_PFM_MASK REG_GENMASK(13, 11) +#define LE_PFM(value) REG_FIELD_PREP(LE_PFM_MASK, value) +#define LE_SCC_MASK REG_GENMASK(10, 8) +#define LE_SCC(value) REG_FIELD_PREP(LE_SCC_MASK, value) +#define LE_RSC_MASK REG_BIT(7) +#define LE_RSC(value) REG_FIELD_PREP(LE_RSC_MASK, value) +#define LE_AOM_MASK REG_BIT(6) +#define LE_AOM(value) REG_FIELD_PREP(LE_AOM_MASK, value) +#define LE_LRUM_MASK REG_GENMASK(5, 4) +#define LE_LRUM(value) REG_FIELD_PREP(LE_LRUM_MASK, value) +#define LE_TGT_CACHE_MASK REG_GENMASK(3, 2) +#define LE_TGT_CACHE(value) REG_FIELD_PREP(LE_TGT_CACHE_MASK, value) +#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) +#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) + #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) @@ -98,6 +119,8 @@ #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) +#define XEHPG_INSTDONE_GEOM_SVGUNIT XE_REG_MCR(0x666c) + #define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) @@ -115,6 +138,14 @@ #define FLSH_IGNORES_PSD REG_BIT(10) #define FD_END_COLLECT REG_BIT(5) +#define SC_INSTDONE XE_REG(0x7100) +#define SC_INSTDONE_EXTRA XE_REG(0x7104) +#define SC_INSTDONE_EXTRA2 XE_REG(0x7108) + +#define XEHPG_SC_INSTDONE XE_REG_MCR(0x7100) +#define XEHPG_SC_INSTDONE_EXTRA XE_REG_MCR(0x7104) +#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) + #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) @@ -173,8 +204,11 @@ #define MAX_MSLICES 4 #define MEML3_EN_MASK REG_GENMASK(3, 0) +#define MIRROR_FUSE1 XE_REG(0x911c) + #define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) +#define XELP_GT_SLICE_ENABLE XE_REG(0x9138) #define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) #define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140) @@ -301,9 +335,24 @@ #define XEHPC_OVRLSCCC REG_BIT(0) /* L3 Cache Control */ +#define LNCFCMOCS_REG_COUNT 32 #define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) #define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) -#define LNCFCMOCS_REG_COUNT 32 +#define L3_UPPER_LKUP_MASK REG_BIT(23) +#define L3_UPPER_GLBGO_MASK REG_BIT(22) +#define L3_UPPER_IDX_CACHEABILITY_MASK REG_GENMASK(21, 20) +#define L3_UPPER_IDX_SCC_MASK REG_GENMASK(19, 17) +#define L3_UPPER_IDX_ESC_MASK REG_BIT(16) +#define L3_LKUP_MASK REG_BIT(7) +#define L3_LKUP(value) REG_FIELD_PREP(L3_LKUP_MASK, value) +#define L3_GLBGO_MASK REG_BIT(6) +#define L3_GLBGO(value) REG_FIELD_PREP(L3_GLBGO_MASK, value) +#define L3_CACHEABILITY_MASK REG_GENMASK(5, 4) +#define L3_CACHEABILITY(value) REG_FIELD_PREP(L3_CACHEABILITY_MASK, value) +#define L3_SCC_MASK REG_GENMASK(3, 1) +#define L3_SCC(value) REG_FIELD_PREP(L3_SCC_MASK, value) +#define L3_ESC_MASK REG_BIT(0) +#define L3_ESC(value) REG_FIELD_PREP(L3_ESC_MASK, value) #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) @@ -345,6 +394,9 @@ #define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) #define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) +#define SAMPLER_INSTDONE XE_REG_MCR(0xe160) +#define ROW_INSTDONE XE_REG_MCR(0xe164) + #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) @@ -353,6 +405,7 @@ #define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) +#define CLEAR_OPTIMIZATION_DISABLE REG_BIT(6) #define CACHE_MODE_SS XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED) #define DISABLE_ECC REG_BIT(5) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 1825d8f79db6..e6ca8bbda8f4 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,6 +11,7 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) @@ -23,4 +24,10 @@ #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) +#define INDIRECT_CTX_RING_HEAD (0x02 + 1) +#define INDIRECT_CTX_RING_TAIL (0x04 + 1) +#define INDIRECT_CTX_RING_START (0x06 + 1) +#define INDIRECT_CTX_RING_START_UDW (0x08 + 1) +#define INDIRECT_CTX_RING_CTL (0x0a + 1) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h index 617ddb84b7fa..017b4ddd1ecf 100644 --- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h @@ -14,6 +14,9 @@ #define LMEM_EN REG_BIT(31) #define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ +#define VIRTUAL_CTRL_REG XE_REG(0x10108c) +#define GUEST_GTT_UPDATE_EN REG_BIT(8) + #define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) #define VF_CAP REG_BIT(0) diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 8cf2367449d8..6e58931fddd4 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -11,6 +11,7 @@ xe_live_test-y = xe_live_test_mod.o \ # Normal kunit tests obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o xe_test-y = xe_test_mod.o \ + xe_args_test.o \ xe_pci_test.o \ xe_rtp_test.o \ xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c new file mode 100644 index 000000000000..f3fb23aa5d2e --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_args_test.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2024 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_args.h" + +static void call_args_example(struct kunit *test) +{ +#define foo X, Y, Z, Q +#define bar COUNT_ARGS(foo) +#define buz CALL_ARGS(COUNT_ARGS, foo) + + KUNIT_EXPECT_EQ(test, bar, 1); + KUNIT_EXPECT_EQ(test, buz, 4); + +#undef foo +#undef bar +#undef buz +} + +static void drop_first_arg_example(struct kunit *test) +{ +#define foo X, Y, Z, Q +#define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo)) + + KUNIT_EXPECT_EQ(test, bar, 3); + +#undef foo +#undef bar +} + +static void first_arg_example(struct kunit *test) +{ + int X = 1; + +#define foo X, Y, Z, Q +#define bar FIRST_ARG(foo) + + KUNIT_EXPECT_EQ(test, bar, X); + KUNIT_EXPECT_STREQ(test, __stringify(bar), "X"); + +#undef foo +#undef bar +} + +static void last_arg_example(struct kunit *test) +{ + int Q = 1; + +#define foo X, Y, Z, Q +#define bar LAST_ARG(foo) + + KUNIT_EXPECT_EQ(test, bar, Q); + KUNIT_EXPECT_STREQ(test, __stringify(bar), "Q"); + +#undef foo +#undef bar +} + +static void pick_arg_example(struct kunit *test) +{ + int Y = 1, Z = 2; + +#define foo X, Y, Z, Q +#define bar PICK_ARG(2, foo) +#define buz PICK_ARG3(foo) + + KUNIT_EXPECT_EQ(test, bar, Y); + KUNIT_EXPECT_STREQ(test, __stringify(bar), "Y"); + KUNIT_EXPECT_EQ(test, buz, Z); + KUNIT_EXPECT_STREQ(test, __stringify(buz), "Z"); + +#undef foo +#undef bar +#undef buz +} + +static void sep_comma_example(struct kunit *test) +{ +#define foo(f) f(X) f(Y) f(Z) f(Q) +#define bar DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify)) +#define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA))) + + static const char * const a[] = { bar }; + + KUNIT_EXPECT_STREQ(test, a[0], "X"); + KUNIT_EXPECT_STREQ(test, a[1], "Y"); + KUNIT_EXPECT_STREQ(test, a[2], "Z"); + KUNIT_EXPECT_STREQ(test, a[3], "Q"); + + KUNIT_EXPECT_EQ(test, buz, 4); + +#undef foo +#undef bar +#undef buz +} + +#define NO_ARGS +#define FOO_ARGS X, Y, Z, Q +#define MAX_ARGS -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12 + +static void count_args_test(struct kunit *test) +{ + int count; + + /* COUNT_ARGS() counts to 12 */ + + count = COUNT_ARGS(); + KUNIT_EXPECT_EQ(test, count, 0); + + count = COUNT_ARGS(1); + KUNIT_EXPECT_EQ(test, count, 1); + + count = COUNT_ARGS(a, b, c, d, e); + KUNIT_EXPECT_EQ(test, count, 5); + + count = COUNT_ARGS(a, b, c, d, e, f, g, h, i, j, k, l); + KUNIT_EXPECT_EQ(test, count, 12); + + /* COUNT_ARGS() does not expand params */ + + count = COUNT_ARGS(NO_ARGS); + KUNIT_EXPECT_EQ(test, count, 1); + + count = COUNT_ARGS(FOO_ARGS); + KUNIT_EXPECT_EQ(test, count, 1); +} + +static void call_args_test(struct kunit *test) +{ + int count; + + count = CALL_ARGS(COUNT_ARGS, NO_ARGS); + KUNIT_EXPECT_EQ(test, count, 0); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, NO_ARGS), 0); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS), 4); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS, FOO_ARGS), 8); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, MAX_ARGS), 12); +} + +static void drop_first_arg_test(struct kunit *test) +{ + int Y = -2, Z = -3, Q = -4; + int a[] = { DROP_FIRST_ARG(FOO_ARGS) }; + + KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(0, -1), -1); + KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(0, -1, -2)), -2); + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(FOO_ARGS)), 3); + KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))), -4); + KUNIT_EXPECT_EQ(test, a[0], -2); + KUNIT_EXPECT_EQ(test, a[1], -3); + KUNIT_EXPECT_EQ(test, a[2], -4); + +#define foo DROP_FIRST_ARG(FOO_ARGS) +#define bar DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS)) +#define buz DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))) + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, foo), 3); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, bar), 2); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, buz), 1); + KUNIT_EXPECT_STREQ(test, __stringify(buz), "Q"); + +#undef foo +#undef bar +#undef buz +} + +static void first_arg_test(struct kunit *test) +{ + int X = -1; + int a[] = { FIRST_ARG(FOO_ARGS) }; + + KUNIT_EXPECT_EQ(test, FIRST_ARG(-1, -2), -1); + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FIRST_ARG(FOO_ARGS)), 1); + KUNIT_EXPECT_EQ(test, FIRST_ARG(FOO_ARGS), -1); + KUNIT_EXPECT_EQ(test, a[0], -1); + KUNIT_EXPECT_STREQ(test, __stringify(FIRST_ARG(FOO_ARGS)), "X"); +} + +static void last_arg_test(struct kunit *test) +{ + int Q = -4; + int a[] = { LAST_ARG(FOO_ARGS) }; + + KUNIT_EXPECT_EQ(test, LAST_ARG(-1, -2), -2); + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, LAST_ARG(FOO_ARGS)), 1); + KUNIT_EXPECT_EQ(test, LAST_ARG(FOO_ARGS), -4); + KUNIT_EXPECT_EQ(test, a[0], -4); + KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(FOO_ARGS)), "Q"); + + KUNIT_EXPECT_EQ(test, LAST_ARG(MAX_ARGS), -12); + KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(MAX_ARGS)), "-12"); +} + +static struct kunit_case args_tests[] = { + KUNIT_CASE(count_args_test), + KUNIT_CASE(call_args_example), + KUNIT_CASE(call_args_test), + KUNIT_CASE(drop_first_arg_example), + KUNIT_CASE(drop_first_arg_test), + KUNIT_CASE(first_arg_example), + KUNIT_CASE(first_arg_test), + KUNIT_CASE(last_arg_example), + KUNIT_CASE(last_arg_test), + KUNIT_CASE(pick_arg_example), + KUNIT_CASE(sep_comma_example), + {} +}; + +static struct kunit_suite args_test_suite = { + .name = "args", + .test_cases = args_tests, +}; + +kunit_test_suite(args_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c new file mode 100644 index 000000000000..b683585db852 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + struct xe_gt *gt; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + gt = xe_device_get_gt(xe, 0); + pf_init_versions(gt); + + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.latest.major); + if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, + gt->sriov.pf.service.version.latest.minor); + + test->priv = gt; + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); + if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); + if (major == gt->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + if (!gt->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major, + gt->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major, + gt->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + if (!gt->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major, + gt->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major - 1, + gt->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 977d5f4e4490..b6e7f80c3774 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -62,36 +62,6 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, return 0; } -static void -sanity_populate_cb(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, void *dst, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct migrate_test_params *p = - to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE)); - int i; - u64 *ptr = dst; - u64 value; - - for (i = 0; i < num_qwords; i++) { - value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, value); - else - ptr[i] = value; - } - - kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU"); - if (p->force_gpu && map) - KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n"); -} - -static const struct xe_migrate_pt_update_ops sanity_ops = { - .populate = sanity_populate_cb, -}; - #define check(_retval, _expected, str, _test) \ do { if ((_retval) != (_expected)) { \ KUNIT_FAIL(_test, "Sanity check failed: " str \ @@ -209,57 +179,6 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, test_copy(m, bo, test, region); } -static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, - struct kunit *test, bool force_gpu) -{ - struct xe_device *xe = tile_to_xe(m->tile); - struct dma_fence *fence; - u64 retval, expected; - ktime_t then, now; - int i; - - struct xe_vm_pgtable_update update = { - .ofs = 1, - .qwords = 0x10, - .pt_bo = pt, - }; - struct xe_migrate_pt_update pt_update = { - .ops = &sanity_ops, - }; - struct migrate_test_params p = { - .base.id = XE_TEST_LIVE_MIGRATE, - .force_gpu = force_gpu, - }; - - test->priv = &p; - /* Test xe_migrate_update_pgtables() updates the pagetable as expected */ - expected = 0xf0f0f0f0f0f0f0f0ULL; - xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); - - then = ktime_get(); - fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1, - NULL, 0, &pt_update); - now = ktime_get(); - if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) - return; - - kunit_info(test, "Updating without syncing took %llu us,\n", - (unsigned long long)ktime_to_us(ktime_sub(now, then))); - - dma_fence_put(fence); - retval = xe_map_rd(xe, &pt->vmap, 0, u64); - check(retval, expected, "PTE[0] must stay untouched", test); - - for (i = 0; i < update.qwords; i++) { - retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64); - check(retval, i * 0x1111111111111111ULL, "PTE update", test); - } - - retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords), - u64); - check(retval, expected, "PTE[0x11] must stay untouched", test); -} - static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) { struct xe_tile *tile = m->tile; @@ -398,11 +317,6 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) test_copy_vram(m, big, test); } - kunit_info(test, "Testing page table update using CPU if GPU idle.\n"); - test_pt_update(m, pt, test, false); - kunit_info(test, "Testing page table update using GPU\n"); - test_pt_update(m, pt, test, true); - out: xe_bb_free(bb, NULL); free_tiny: diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h new file mode 100644 index 000000000000..4dbc7e53c624 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_args.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_ARGS_H_ +#define _XE_ARGS_H_ + +#include <linux/args.h> + +/* + * Why don't the following macros have the XE prefix? + * + * Once we find more potential users outside of the Xe driver, we plan to move + * all of the following macros unchanged to linux/args.h. + */ + +/** + * CALL_ARGS - Invoke a macro, but allow parameters to be expanded beforehand. + * @f: name of the macro to invoke + * @args: arguments for the macro + * + * This macro allows calling macros which names might generated or we want to + * make sure it's arguments will be correctly expanded. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar COUNT_ARGS(foo) + * #define buz CALL_ARGS(COUNT_ARGS, foo) + * + * With above definitions bar expands to 1 while buz expands to 4. + */ +#define CALL_ARGS(f, args...) __CALL_ARGS(f, args) +#define __CALL_ARGS(f, args...) f(args) + +/** + * DROP_FIRST_ARG - Returns all arguments except the first one. + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo)) + * + * With above definitions bar expands to 3. + */ +#define DROP_FIRST_ARG(args...) __DROP_FIRST_ARG(args) +#define __DROP_FIRST_ARG(a, b...) b + +/** + * FIRST_ARG - Returns the first argument. + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar FIRST_ARG(foo) + * + * With above definitions bar expands to X. + */ +#define FIRST_ARG(args...) __FIRST_ARG(args) +#define __FIRST_ARG(a, b...) a + +/** + * LAST_ARG - Returns the last argument. + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Like COUNT_ARGS() this macro works up to 12 arguments. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar LAST_ARG(foo) + * + * With above definitions bar expands to Q. + */ +#define LAST_ARG(args...) __LAST_ARG(args) +#define __LAST_ARG(args...) PICK_ARG(COUNT_ARGS(args), args) + +/** + * PICK_ARG - Returns the n-th argument. + * @n: argument number to be returned + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Like COUNT_ARGS() this macro supports n up to 12. + * Specialized macros PICK_ARG1() to PICK_ARG12() are also available. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar PICK_ARG(2, foo) + * #define buz PICK_ARG3(foo) + * + * With above definitions bar expands to Y and buz expands to Z. + */ +#define PICK_ARG(n, args...) __PICK_ARG(n, args) +#define __PICK_ARG(n, args...) CALL_ARGS(CONCATENATE(PICK_ARG, n), args) +#define PICK_ARG1(args...) FIRST_ARG(args) +#define PICK_ARG2(args...) PICK_ARG1(DROP_FIRST_ARG(args)) +#define PICK_ARG3(args...) PICK_ARG2(DROP_FIRST_ARG(args)) +#define PICK_ARG4(args...) PICK_ARG3(DROP_FIRST_ARG(args)) +#define PICK_ARG5(args...) PICK_ARG4(DROP_FIRST_ARG(args)) +#define PICK_ARG6(args...) PICK_ARG5(DROP_FIRST_ARG(args)) +#define PICK_ARG7(args...) PICK_ARG6(DROP_FIRST_ARG(args)) +#define PICK_ARG8(args...) PICK_ARG7(DROP_FIRST_ARG(args)) +#define PICK_ARG9(args...) PICK_ARG8(DROP_FIRST_ARG(args)) +#define PICK_ARG10(args...) PICK_ARG9(DROP_FIRST_ARG(args)) +#define PICK_ARG11(args...) PICK_ARG10(DROP_FIRST_ARG(args)) +#define PICK_ARG12(args...) PICK_ARG11(DROP_FIRST_ARG(args)) + +/** + * ARGS_SEP_COMMA - Definition of a comma character. + * + * This definition can be used in cases where any intermediate macro expects + * fixed number of arguments, but we want to pass more arguments which can + * be properly evaluated only by the next level macro. + * + * Example: + * + * #define foo(f) f(X) f(Y) f(Z) f(Q) + * #define bar DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify)) + * #define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA))) + * + * With above definitions bar expands to + * "X", "Y", "Z", "Q" + * and buz expands to 4. + */ +#define ARGS_SEP_COMMA , + +#endif diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 541361caff3b..a13e0b3a169e 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -6,7 +6,7 @@ #include "xe_bb.h" #include "instructions/xe_mi_commands.h" -#include "regs/xe_gpu_commands.h" +#include "xe_assert.h" #include "xe_device.h" #include "xe_exec_queue_types.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index bc1f794e3e61..03f7fe7acf8c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -96,6 +96,20 @@ bool xe_bo_is_stolen(struct xe_bo *bo) } /** + * xe_bo_has_single_placement - check if BO is placed only in one memory location + * @bo: The BO + * + * This function checks whether a given BO is placed in only one memory location. + * + * Returns: true if the BO is placed in a single memory location, false otherwise. + * + */ +bool xe_bo_has_single_placement(struct xe_bo *bo) +{ + return bo->placement.num_placement == 1; +} + +/** * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR * @bo: The BO * @@ -302,6 +316,18 @@ static int xe_tt_map_sg(struct ttm_tt *tt) return 0; } +static void xe_tt_unmap_sg(struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + if (xe_tt->sg) { + dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, + DMA_BIDIRECTIONAL, 0); + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + } +} + struct sg_table *xe_bo_sg(struct xe_bo *bo) { struct ttm_tt *tt = bo->ttm.ttm; @@ -377,27 +403,15 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, if (err) return err; - /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */ - err = xe_tt_map_sg(tt); - if (err) - ttm_pool_free(&ttm_dev->pool, tt); - return err; } static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) { - struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); - if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) return; - if (xe_tt->sg) { - dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, - DMA_BIDIRECTIONAL, 0); - sg_free_table(xe_tt->sg); - xe_tt->sg = NULL; - } + xe_tt_unmap_sg(tt); return ttm_pool_free(&ttm_dev->pool, tt); } @@ -628,17 +642,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && ttm && ttm_tt_is_populated(ttm)) ? true : false; int ret = 0; + /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { - ttm_bo_move_null(ttm_bo, new_mem); - return 0; + if (new_mem->mem_type == XE_PL_TT) + ret = xe_tt_map_sg(ttm); + if (!ret) + ttm_bo_move_null(ttm_bo, new_mem); + goto out; } if (ttm_bo->type == ttm_bo_type_sg) { ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); - goto out; + return ret; } tt_has_data = ttm && (ttm_tt_is_populated(ttm) || @@ -650,6 +668,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); + if (new_mem->mem_type == XE_PL_TT) { + ret = xe_tt_map_sg(ttm); + if (ret) + goto out; + } + if ((move_lacks_source && !needs_clear)) { ttm_bo_move_null(ttm_bo, new_mem); goto out; @@ -786,8 +810,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_pm_runtime_put(xe); out: - return ret; + if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && + ttm_bo->ttm) + xe_tt_unmap_sg(ttm_bo->ttm); + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index a885b14bf595..6de894c728f5 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -206,6 +206,7 @@ bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); bool xe_bo_is_stolen_devmem(struct xe_bo *bo); +bool xe_bo_has_single_placement(struct xe_bo *bo); uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 0b7aebaae843..1011e5d281fa 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -12,7 +12,10 @@ #include "xe_bo.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt_debugfs.h" +#include "xe_gt_printk.h" +#include "xe_guc_ads.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_step.h" @@ -118,6 +121,58 @@ static const struct file_operations forcewake_all_fops = { .release = forcewake_release, }; +static ssize_t wedged_mode_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[32]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->wedged.mode); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + struct xe_gt *gt; + u32 wedged_mode; + ssize_t ret; + u8 id; + + ret = kstrtouint_from_user(ubuf, size, 0, &wedged_mode); + if (ret) + return ret; + + if (wedged_mode > 2) + return -EINVAL; + + if (xe->wedged.mode == wedged_mode) + return 0; + + xe->wedged.mode = wedged_mode; + + xe_pm_runtime_get(xe); + for_each_gt(gt, xe, id) { + ret = xe_guc_ads_scheduler_policy_toggle_reset(>->uc.guc.ads); + if (ret) { + xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n"); + return -EIO; + } + } + xe_pm_runtime_put(xe); + + return size; +} + +static const struct file_operations wedged_mode_fops = { + .owner = THIS_MODULE, + .read = wedged_mode_show, + .write = wedged_mode_set, +}; + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; @@ -135,6 +190,9 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("forcewake_all", 0400, root, xe, &forcewake_all_fops); + debugfs_create_file("wedged_mode", 0400, root, xe, + &wedged_mode_fops); + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { man = ttm_manager_type(bdev, mem_type); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c3267a21957b..f89c781986a5 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -17,6 +17,7 @@ #include <drm/xe_drm.h> #include "display/xe_display.h" +#include "instructions/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" @@ -27,6 +28,7 @@ #include "xe_drv.h" #include "xe_exec.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gsc_proxy.h" #include "xe_gt.h" @@ -138,6 +140,9 @@ static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct xe_device *xe = to_xe_device(file_priv->minor->dev); long ret; + if (xe_device_wedged(xe)) + return -ECANCELED; + ret = xe_pm_runtime_get_ioctl(xe); if (ret >= 0) ret = drm_ioctl(file, cmd, arg); @@ -153,6 +158,9 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo struct xe_device *xe = to_xe_device(file_priv->minor->dev); long ret; + if (xe_device_wedged(xe)) + return -ECANCELED; + ret = xe_pm_runtime_get_ioctl(xe); if (ret >= 0) ret = drm_compat_ioctl(file, cmd, arg); @@ -269,7 +277,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); - drmm_mutex_init(&xe->drm, &xe->usm.lock); + err = drmm_mutex_init(&xe->drm, &xe->usm.lock); + if (err) + goto err; + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { @@ -501,6 +512,8 @@ int xe_device_probe_early(struct xe_device *xe) if (err) return err; + xe->wedged.mode = xe_modparam.wedged_mode; + return 0; } @@ -808,3 +821,34 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) { return address & GENMASK_ULL(xe->info.va_bits - 1, 0); } + +/** + * xe_device_declare_wedged - Declare device wedged + * @xe: xe device instance + * + * This is a final state that can only be cleared with a mudule + * re-probe (unbind + bind). + * In this state every IOCTL will be blocked so the GT cannot be used. + * In general it will be called upon any critical error such as gt reset + * failure or guc loading failure. + * If xe.wedged module parameter is set to 2, this function will be called + * on every single execution timeout (a.k.a. GPU hang) right after devcoredump + * snapshot capture. In this mode, GT reset won't be attempted so the state of + * the issue is preserved for further debugging. + */ +void xe_device_declare_wedged(struct xe_device *xe) +{ + if (xe->wedged.mode == 0) { + drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n"); + return; + } + + if (!atomic_xchg(&xe->wedged.flag, 1)) { + xe->needs_flr_on_fini = true; + drm_err(&xe->drm, + "CRITICAL: Xe has declared device %s as wedged.\n" + "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" + "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", + dev_name(xe->drm.dev)); + } +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 7524a71c0d84..bb07f5669dbb 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -6,15 +6,9 @@ #ifndef _XE_DEVICE_H_ #define _XE_DEVICE_H_ -struct xe_exec_queue; -struct xe_file; - #include <drm/drm_util.h> -#include "regs/xe_gpu_commands.h" #include "xe_device_types.h" -#include "xe_force_wake.h" -#include "xe_macros.h" static inline struct xe_device *to_xe_device(const struct drm_device *dev) { @@ -169,4 +163,11 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); void xe_device_td_flush(struct xe_device *xe); +static inline bool xe_device_wedged(struct xe_device *xe) +{ + return atomic_read(&xe->wedged.flag); +} + +void xe_device_declare_wedged(struct xe_device *xe); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 7674afbb3a30..5c5e36de452a 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -281,6 +281,10 @@ struct xe_device { u8 has_heci_gscfi:1; /** @info.skip_guc_pc: Skip GuC based PM feature init */ u8 skip_guc_pc:1; + /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ + u8 has_atomic_enable_pte_bit:1; + /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ + u8 has_device_atomics_on_smem:1; #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) struct { @@ -459,6 +463,14 @@ struct xe_device { /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; + /** @wedged: Struct to control Wedged States and mode */ + struct { + /** @wedged.flag: Xe device faced a critical error and is now blocked. */ + atomic_t flag; + /** @wedged.mode: Mode controlled by kernel parameter and debugfs */ + int mode; + } wedged; + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 02ce8d204622..48f6da53a292 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -26,6 +26,15 @@ void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance); +static inline struct xe_exec_queue * +xe_exec_queue_get_unless_zero(struct xe_exec_queue *q) +{ + if (kref_get_unless_zero(&q->refcount)) + return q; + + return NULL; +} + struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id); static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index dece2785933c..e9dee1e14fef 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -9,7 +9,6 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_assert.h" diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 60202b903687..8cc6420a9e7f 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -14,6 +14,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 1b908d238bd1..6d6d1068cf23 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -15,6 +15,7 @@ #include "abi/gsc_proxy_commands_abi.h" #include "regs/xe_gsc_regs.h" #include "xe_bo.h" +#include "xe_force_wake.h" #include "xe_gsc.h" #include "xe_gsc_submit.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c index d34d03248843..9ede483d37ef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.c +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -8,6 +8,7 @@ #include <linux/poison.h> #include "abi/gsc_command_header_abi.h" +#include "xe_assert.h" #include "xe_bb.h" #include "xe_exec_queue.h" #include "xe_gt_printk.h" diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 491d0413de15..36c7b1631fa6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (q->hwe->class == XE_ENGINE_CLASS_RENDER) /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false); + bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); else /* Just pick a large BB size */ bb = xe_bb_new(gt, SZ_4K, false); @@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) xe_tuning_process_lrc(hwe); default_lrc = drmm_kzalloc(&xe->drm, - xe_lrc_size(xe, hwe->class), + xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); if (!default_lrc) return -ENOMEM; @@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) xe_map_memcpy_from(xe, default_lrc, &q->lrc[0].bo->vmap, xe_lrc_pphwsp_offset(&q->lrc[0]), - xe_lrc_size(xe, hwe->class)); + xe_gt_lrc_size(gt, hwe->class)); gt->default_lrc[hwe->class] = default_lrc; put_nop_q: @@ -477,6 +477,9 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_init_hw(gt); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); @@ -613,6 +616,9 @@ static int do_gt_restart(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_init_hw(gt); + xe_mocs_init(gt); err = xe_uc_start(>->uc); if (err) @@ -633,6 +639,9 @@ static int gt_reset(struct xe_gt *gt) { int err; + if (xe_device_wedged(gt_to_xe(gt))) + return -ECANCELED; + /* We only support GT resets with GuC submission */ if (!xe_device_uc_enabled(gt_to_xe(gt))) return -ENODEV; @@ -655,9 +664,7 @@ static int gt_reset(struct xe_gt *gt) xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); - err = xe_uc_stop(>->uc); - if (err) - goto err_out; + xe_uc_stop(>->uc); xe_gt_tlb_invalidation_reset(gt); @@ -685,7 +692,7 @@ err_msg: err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - gt_to_xe(gt)->needs_flr_on_fini = true; + xe_device_declare_wedged(gt_to_xe(gt)); return err; } diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index ed6ea8057e35..8474c50b1b30 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -8,6 +8,7 @@ #include <drm/drm_util.h> +#include "xe_device.h" #include "xe_device_types.h" #include "xe_hw_engine.h" @@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, u16 instance, bool logical); +static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) +{ + return gt->info.has_indirect_ring_state && + xe_device_uc_enabled(gt_to_xe(gt)); +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 396aeb5b9924..a34c9a24dafc 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -68,8 +68,8 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) xe_mmio_write32(gt, CCS_MODE, mode); - xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", - mode, config, num_engines, num_slices); + xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", + mode, config, num_engines, num_slices); } void xe_gt_apply_ccs_mode(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index c7bca20f6b65..9ff2061133df 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -7,6 +7,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" +#include "xe_assert.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_macros.h" diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 8cf0b2625efc..c5e562e143fd 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -15,14 +15,17 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" +#include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_macros.h" +#include "xe_mocs.h" #include "xe_pat.h" #include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" +#include "xe_sriov.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" @@ -200,6 +203,15 @@ static int pat(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int mocs(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_mocs_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -255,6 +267,7 @@ static const struct drm_info_list debugfs_list[] = { {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, + {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, @@ -290,4 +303,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) root, minor); xe_uc_debugfs_register(>->uc, root); + + if (IS_SRIOV_PF(xe)) + xe_gt_sriov_pf_debugfs_register(gt, root); } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 8fc0f3f6ecc5..a4f6f0a96d05 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -5,6 +5,7 @@ #include <drm/drm_managed.h> +#include "xe_force_wake.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_idle.h" diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index a7f4ab1aa584..e7d03e001a49 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -40,4 +40,28 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, for_each_dss((dss), (gt)) \ for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true)) +/* + * Loop over each DSS available for geometry and determine the group and + * instance IDs that should be used to steer MCR accesses toward this DSS. + * @dss: DSS ID to obtain steering for + * @gt: GT structure + * @group: steering group ID, data type: u16 + * @instance: steering instance ID, data type: u16 + */ +#define for_each_geometry_dss(dss, gt, group, instance) \ + for_each_dss_steering(dss, gt, group, instance) \ + if (xe_gt_has_geometry_dss(gt, dss)) + +/* + * Loop over each DSS available for compute and determine the group and + * instance IDs that should be used to steer MCR accesses toward this DSS. + * @dss: DSS ID to obtain steering for + * @gt: GT structure + * @group: steering group ID, data type: u16 + * @instance: steering instance ID, data type: u16 + */ +#define for_each_compute_dss(dss, gt, group, instance) \ + for_each_dss_steering(dss, gt, group, instance) \ + if (xe_gt_has_compute_dss(gt, dss)) + #endif /* _XE_GT_MCR_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index fa9e9853c53b..040dd142c49c 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -19,7 +19,6 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_migrate.h" -#include "xe_pt.h" #include "xe_trace.h" #include "xe_vm.h" @@ -204,15 +203,14 @@ retry_userptr: drm_exec_retry_on_contention(&exec); if (ret) goto unlock_dma_resv; - } - /* Bind VMA only to the GT that has faulted */ - trace_xe_vma_pf_bind(vma); - fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0, - vma->tile_present & BIT(tile->id)); - if (IS_ERR(fence)) { - ret = PTR_ERR(fence); - goto unlock_dma_resv; + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + fence = xe_vma_rebind(vm, vma, BIT(tile->id)); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto unlock_dma_resv; + } } /* diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 791dcdd767e2..7decf71c2b7d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -5,8 +5,12 @@ #include <drm/drm_managed.h> +#include "regs/xe_sriov_regs.h" + #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_service.h" +#include "xe_mmio.h" /* * VF's metadata is maintained in the flexible array where: @@ -48,5 +52,33 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) if (err) return err; + err = xe_gt_sriov_pf_service_init(gt); + if (err) + return err; + return 0; } + +static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) == 1200; +} + +static void pf_enable_ggtt_guest_update(struct xe_gt *gt) +{ + xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN); +} + +/** + * xe_gt_sriov_pf_init_hw - Initialize SR-IOV hardware support. + * @gt: the &xe_gt to initialize + * + * On some platforms the PF must explicitly enable VF's access to the GGTT. + */ +void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) +{ + if (pf_needs_enable_ggtt_guest_update(gt_to_xe(gt))) + pf_enable_ggtt_guest_update(gt); + + xe_gt_sriov_pf_service_update(gt); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index 05142ffc4319..37d7d6c3df03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -10,11 +10,16 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) { return 0; } + +static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) +{ +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 79116ad58620..7eac01e04cc5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -187,14 +187,20 @@ static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u3 return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); } -static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum) +static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 *exec_quantum) { - return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum); + /* GuC will silently clamp values exceeding max */ + *exec_quantum = min_t(u32, *exec_quantum, GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE); + + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, *exec_quantum); } -static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout) +static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 *preempt_timeout) { - return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout); + /* GuC will silently clamp values exceeding max */ + *preempt_timeout = min_t(u32, *preempt_timeout, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE); + + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout); } static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1604,7 +1610,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); int err; - err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum); + err = pf_push_vf_cfg_exec_quantum(gt, vfid, &exec_quantum); if (unlikely(err)) return err; @@ -1674,7 +1680,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); int err; - err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout); + err = pf_push_vf_cfg_preempt_timeout(gt, vfid, &preempt_timeout); if (unlikely(err)) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c new file mode 100644 index 000000000000..5102035faa7e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include <drm/drm_print.h> +#include <drm/drm_debugfs.h> + +#include "xe_bo.h" +#include "xe_debugfs.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_debugfs.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_debugfs.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_policy.h" +#include "xe_gt_sriov_pf_service.h" +#include "xe_pm.h" + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 # d_inode->i_private = gt + * │ ├── pf # d_inode->i_private = gt + * │ ├── vf1 # d_inode->i_private = VFID(1) + * : : + * │ ├── vfN # d_inode->i_private = VFID(N) + */ + +static void *extract_priv(struct dentry *d) +{ + return d->d_inode->i_private; +} + +static struct xe_gt *extract_gt(struct dentry *d) +{ + return extract_priv(d->d_parent); +} + +static unsigned int extract_vfid(struct dentry *d) +{ + return extract_priv(d) == extract_gt(d) ? PFID : (uintptr_t)extract_priv(d); +} + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── pf + * │ │ ├── ggtt_available + * │ │ ├── ggtt_provisioned + * │ │ ├── contexts_provisioned + * │ │ ├── doorbells_provisioned + * │ │ ├── runtime_registers + * │ │ ├── negotiated_versions + */ + +static const struct drm_info_list pf_info[] = { + { + "ggtt_available", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_available_ggtt, + }, + { + "ggtt_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_ggtt, + }, + { + "contexts_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_ctxs, + }, + { + "doorbells_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_dbs, + }, + { + "runtime_registers", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_service_print_runtime, + }, + { + "negotiated_versions", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_service_print_version, + }, +}; + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── pf + * │ │ ├── reset_engine + * │ │ ├── sample_period + * │ │ ├── sched_if_idle + */ + +#define DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(POLICY, TYPE, FORMAT) \ + \ +static int POLICY##_set(void *data, u64 val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + struct xe_device *xe = gt_to_xe(gt); \ + int err; \ + \ + if (val > (TYPE)~0ull) \ + return -EOVERFLOW; \ + \ + xe_pm_runtime_get(xe); \ + err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \ + xe_pm_runtime_put(xe); \ + \ + return err; \ +} \ + \ +static int POLICY##_get(void *data, u64 *val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + \ + *val = xe_gt_sriov_pf_policy_get_##POLICY(gt); \ + return 0; \ +} \ + \ +DEFINE_DEBUGFS_ATTRIBUTE(POLICY##_fops, POLICY##_get, POLICY##_set, FORMAT) + +DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(reset_engine, bool, "%llu\n"); +DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sched_if_idle, bool, "%llu\n"); +DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sample_period, u32, "%llu\n"); + +static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) +{ + xe_gt_assert(gt, gt == extract_gt(parent)); + xe_gt_assert(gt, PFID == extract_vfid(parent)); + + debugfs_create_file_unsafe("reset_engine", 0644, parent, parent, &reset_engine_fops); + debugfs_create_file_unsafe("sched_if_idle", 0644, parent, parent, &sched_if_idle_fops); + debugfs_create_file_unsafe("sample_period_ms", 0644, parent, parent, &sample_period_fops); +} + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── pf + * │ │ ├── ggtt_spare + * │ │ ├── lmem_spare + * │ │ ├── doorbells_spare + * │ │ ├── contexts_spare + * │ │ ├── exec_quantum_ms + * │ │ ├── preempt_timeout_us + * │ ├── vf1 + * │ │ ├── ggtt_quota + * │ │ ├── lmem_quota + * │ │ ├── doorbells_quota + * │ │ ├── contexts_quota + * │ │ ├── exec_quantum_ms + * │ │ ├── preempt_timeout_us + */ + +#define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ + \ +static int CONFIG##_set(void *data, u64 val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + unsigned int vfid = extract_vfid(data); \ + struct xe_device *xe = gt_to_xe(gt); \ + int err; \ + \ + if (val > (TYPE)~0ull) \ + return -EOVERFLOW; \ + \ + xe_pm_runtime_get(xe); \ + err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + xe_pm_runtime_put(xe); \ + \ + return err; \ +} \ + \ +static int CONFIG##_get(void *data, u64 *val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + unsigned int vfid = extract_vfid(data); \ + \ + *val = xe_gt_sriov_pf_config_get_##CONFIG(gt, vfid); \ + return 0; \ +} \ + \ +DEFINE_DEBUGFS_ATTRIBUTE(CONFIG##_fops, CONFIG##_get, CONFIG##_set, FORMAT) + +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, u64, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(lmem, u64, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); + +static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigned int vfid) +{ + xe_gt_assert(gt, gt == extract_gt(parent)); + xe_gt_assert(gt, vfid == extract_vfid(parent)); + + if (!xe_gt_is_media_type(gt)) { + debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", + 0644, parent, parent, &ggtt_fops); + if (IS_DGFX(gt_to_xe(gt))) + debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare", + 0644, parent, parent, &lmem_fops); + } + debugfs_create_file_unsafe(vfid ? "doorbells_quota" : "doorbells_spare", + 0644, parent, parent, &dbs_fops); + debugfs_create_file_unsafe(vfid ? "contexts_quota" : "contexts_spare", + 0644, parent, parent, &ctxs_fops); + debugfs_create_file_unsafe("exec_quantum_ms", 0644, parent, parent, + &exec_quantum_fops); + debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent, + &preempt_timeout_fops); +} + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── vf1 + * │ │ ├── control { stop, pause, resume } + */ + +static const struct { + const char *cmd; + int (*fn)(struct xe_gt *gt, unsigned int vfid); +} control_cmds[] = { + { "stop", xe_gt_sriov_pf_control_stop_vf }, + { "pause", xe_gt_sriov_pf_control_pause_vf }, + { "resume", xe_gt_sriov_pf_control_resume_vf }, +}; + +static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) +{ + struct dentry *dent = file_dentry(file); + struct dentry *parent = dent->d_parent; + struct xe_gt *gt = extract_gt(parent); + struct xe_device *xe = gt_to_xe(gt); + unsigned int vfid = extract_vfid(parent); + int ret = -EINVAL; + char cmd[32]; + size_t n; + + xe_gt_assert(gt, vfid); + xe_gt_sriov_pf_assert_vfid(gt, vfid); + + if (*pos) + return -ESPIPE; + + if (count > sizeof(cmd) - 1) + return -EINVAL; + + ret = simple_write_to_buffer(cmd, sizeof(cmd) - 1, pos, buf, count); + if (ret < 0) + return ret; + cmd[ret] = '\0'; + + for (n = 0; n < ARRAY_SIZE(control_cmds); n++) { + xe_gt_assert(gt, sizeof(cmd) > strlen(control_cmds[n].cmd)); + + if (sysfs_streq(cmd, control_cmds[n].cmd)) { + xe_pm_runtime_get(xe); + ret = control_cmds[n].fn ? (*control_cmds[n].fn)(gt, vfid) : 0; + xe_pm_runtime_put(xe); + break; + } + } + + return (ret < 0) ? ret : count; +} + +static ssize_t control_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + char help[128]; + size_t n; + + help[0] = '\0'; + for (n = 0; n < ARRAY_SIZE(control_cmds); n++) { + strlcat(help, control_cmds[n].cmd, sizeof(help)); + strlcat(help, "\n", sizeof(help)); + } + + return simple_read_from_buffer(buf, count, ppos, help, strlen(help)); +} + +static const struct file_operations control_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = control_write, + .read = control_read, + .llseek = default_llseek, +}; + +/** + * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. + * @gt: the &xe_gt to register + * @root: the &dentry that represents the GT directory + * + * Register SR-IOV PF entries that are GT related and must be shown under GT debugfs. + */ +void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_minor *minor = xe->drm.primary; + int n, totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct dentry *pfdentry; + struct dentry *vfdentry; + char buf[14]; /* should be enough up to "vf%u\0" for 2^32 - 1 */ + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + xe_gt_assert(gt, root->d_inode->i_private == gt); + + /* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── pf + */ + pfdentry = debugfs_create_dir("pf", root); + if (IS_ERR(pfdentry)) + return; + pfdentry->d_inode->i_private = gt; + + drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); + pf_add_policy_attrs(gt, pfdentry); + pf_add_config_attrs(gt, pfdentry, PFID); + + for (n = 1; n <= totalvfs; n++) { + /* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── vf1 + * │ ├── vf2 + */ + snprintf(buf, sizeof(buf), "vf%u", n); + vfdentry = debugfs_create_dir(buf, root); + if (IS_ERR(vfdentry)) + break; + vfdentry->d_inode->i_private = (void *)(uintptr_t)n; + + pf_add_config_attrs(gt, vfdentry, VFID(n)); + debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops); + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h new file mode 100644 index 000000000000..038cc8ddc244 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_DEBUGFS_H_ +#define _XE_GT_SRIOV_PF_DEBUGFS_H_ + +struct xe_gt; +struct dentry; + +#ifdef CONFIG_PCI_IOV +void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root); +#else +static inline void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c new file mode 100644 index 000000000000..0e23b7ea4f3e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_sriov_abi.h" +#include "abi/guc_relay_actions_abi.h" + +#include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" +#include "regs/xe_regs.h" + +#include "xe_mmio.h" +#include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_service.h" +#include "xe_gt_sriov_pf_service_types.h" +#include "xe_guc_ct.h" +#include "xe_guc_hxg_helpers.h" + +static void pf_init_versions(struct xe_gt *gt) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + /* base versions may differ between platforms */ + gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_gt *gt, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; + struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; + + xe_gt_assert(gt, base.major); + xe_gt_assert(gt, base.major <= latest.major); + xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_gt_assert(gt, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) +{ + xe_gt_sriov_pf_assert_vfid(gt, vfid); + xe_gt_assert(gt, major || minor); + + gt->sriov.pf.vfs[vfid].version.major = major; + gt->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_gt *gt, u32 vfid) +{ + xe_gt_sriov_pf_assert_vfid(gt, vfid); + + gt->sriov.pf.vfs[vfid].version.major = 0; + gt->sriov.pf.vfs[vfid].version.minor = 0; +} + +static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) +{ + xe_gt_sriov_pf_assert_vfid(gt, vfid); + + return major == gt->sriov.pf.vfs[vfid].version.major && + minor <= gt->sriov.pf.vfs[vfid].version.minor; +} + +static const struct xe_reg tgl_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_SLICE_ENABLE, /* _MMIO(0x9138) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg ats_m_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg pvc_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + CTC_MODE, /* _MMIO(0xA26C) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg ver_1270_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg ver_2000_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ + XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ + XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count) +{ + const struct xe_reg *regs; + + if (GRAPHICS_VERx100(xe) >= 2000) { + *count = ARRAY_SIZE(ver_2000_runtime_regs); + regs = ver_2000_runtime_regs; + } else if (GRAPHICS_VERx100(xe) >= 1270) { + *count = ARRAY_SIZE(ver_1270_runtime_regs); + regs = ver_1270_runtime_regs; + } else if (GRAPHICS_VERx100(xe) == 1260) { + *count = ARRAY_SIZE(pvc_runtime_regs); + regs = pvc_runtime_regs; + } else if (GRAPHICS_VERx100(xe) == 1255) { + *count = ARRAY_SIZE(ats_m_runtime_regs); + regs = ats_m_runtime_regs; + } else if (GRAPHICS_VERx100(xe) == 1200) { + *count = ARRAY_SIZE(tgl_runtime_regs); + regs = tgl_runtime_regs; + } else { + regs = ERR_PTR(-ENOPKG); + *count = 0; + } + + return regs; +} + +static int pf_alloc_runtime_info(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + const struct xe_reg *regs; + unsigned int size; + u32 *values; + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + xe_gt_assert(gt, !gt->sriov.pf.service.runtime.size); + xe_gt_assert(gt, !gt->sriov.pf.service.runtime.regs); + xe_gt_assert(gt, !gt->sriov.pf.service.runtime.values); + + regs = pick_runtime_regs(xe, &size); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + if (unlikely(!size)) + return 0; + + values = drmm_kcalloc(&xe->drm, size, sizeof(u32), GFP_KERNEL); + if (!values) + return -ENOMEM; + + gt->sriov.pf.service.runtime.size = size; + gt->sriov.pf.service.runtime.regs = regs; + gt->sriov.pf.service.runtime.values = values; + + return 0; +} + +static void read_many(struct xe_gt *gt, unsigned int count, + const struct xe_reg *regs, u32 *values) +{ + while (count--) + *values++ = xe_mmio_read32(gt, *regs++); +} + +static void pf_prepare_runtime_info(struct xe_gt *gt) +{ + const struct xe_reg *regs; + unsigned int size; + u32 *values; + + if (!gt->sriov.pf.service.runtime.size) + return; + + size = gt->sriov.pf.service.runtime.size; + regs = gt->sriov.pf.service.runtime.regs; + values = gt->sriov.pf.service.runtime.values; + + read_many(gt, size, regs, values); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { + struct drm_printer p = xe_gt_info_printer(gt); + + xe_gt_sriov_pf_service_print_runtime(gt, &p); + } +} + +/** + * xe_gt_sriov_pf_service_init - Early initialization of the GT SR-IOV PF services. + * @gt: the &xe_gt to initialize + * + * Performs early initialization of the GT SR-IOV PF services, including preparation + * of the runtime info that will be shared with VFs. + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_service_init(struct xe_gt *gt) +{ + int err; + + pf_init_versions(gt); + + err = pf_alloc_runtime_info(gt); + if (unlikely(err)) + goto failed; + + return 0; +failed: + xe_gt_sriov_err(gt, "Failed to initialize service (%pe)\n", ERR_PTR(err)); + return err; +} + +/** + * xe_gt_sriov_pf_service_update - Update PF SR-IOV services. + * @gt: the &xe_gt to update + * + * Updates runtime data shared with VFs. + * + * This function can be called more than once. + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_service_update(struct xe_gt *gt) +{ + pf_prepare_runtime_info(gt); +} + +/** + * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) +{ + pf_disconnect(gt, vfid); +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_process_handshake(struct xe_gt *gt, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(gt, vfid); + } else { + xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(gt, vfid, *major, *minor); + } + + return 0; +} + +/* Return: length of the response message or a negative error code on failure. */ +static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, + const u32 *request, u32 len, u32 *response, u32 size) +{ + u32 wanted_major, wanted_minor; + u32 major, minor; + u32 mbz; + int err; + + if (unlikely(len != VF2PF_HANDSHAKE_REQUEST_MSG_LEN)) + return -EMSGSIZE; + + mbz = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ, request[0]); + if (unlikely(mbz)) + return -EPFNOSUPPORT; + + wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); + wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); + + err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + if (err < 0) + return err; + + xe_gt_assert(gt, major || minor); + xe_gt_assert(gt, size >= VF2PF_HANDSHAKE_RESPONSE_MSG_LEN); + + response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) | + FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, 0); + response[1] = FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, major) | + FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, minor); + + return VF2PF_HANDSHAKE_RESPONSE_MSG_LEN; +} + +struct reg_data { + u32 offset; + u32 value; +} __packed; +static_assert(hxg_sizeof(struct reg_data) == 2); + +/* Return: number of entries copied or negative error code on failure. */ +static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit, + struct reg_data *data, u32 *remaining) +{ + struct xe_gt_sriov_pf_service_runtime_regs *runtime; + unsigned int count, i; + u32 addr; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + runtime = >->sriov.pf.service.runtime; + + if (start > runtime->size) + return -ERANGE; + + count = min_t(u32, runtime->size - start, limit); + + for (i = 0; i < count; ++i, ++data) { + addr = runtime->regs[start + i].addr; + data->offset = xe_mmio_adjusted_addr(gt, addr); + data->value = runtime->values[start + i]; + } + + *remaining = runtime->size - start - count; + return count; +} + +/* Return: length of the response message or a negative error code on failure. */ +static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, u32 *response, u32 resp_size) +{ + const u32 chunk_size = hxg_sizeof(struct reg_data); + struct reg_data *reg_data_buf; + u32 limit, start, max_chunks; + u32 remaining = 0; + int ret; + + if (!pf_is_negotiated(gt, origin, 1, 0)) + return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) + return -EMSGSIZE; + if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) + return -EPROTO; + if (unlikely(resp_size < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN)) + return -EINVAL; + + limit = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, msg[0]); + start = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, msg[1]); + + resp_size = min_t(u32, resp_size, VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN); + max_chunks = (resp_size - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / chunk_size; + limit = limit == VF2PF_QUERY_RUNTIME_NO_LIMIT ? max_chunks : min_t(u32, max_chunks, limit); + reg_data_buf = (void *)(response + VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN); + + ret = pf_service_runtime_query(gt, start, limit, reg_data_buf, &remaining); + if (ret < 0) + return ret; + + response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) | + FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, ret); + response[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, remaining); + + return VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + ret * hxg_sizeof(struct reg_data); +} + +/** + * xe_gt_sriov_pf_service_process_request - Service GT level SR-IOV request message from the VF. + * @gt: the &xe_gt that provides the service + * @origin: VF number that is requesting the service + * @msg: request message + * @msg_len: length of the request message (in dwords) + * @response: placeholder for the response message + * @resp_size: length of the response message buffer (in dwords) + * + * This function processes `Relay Message`_ request from the VF. + * + * Return: length of the response message or a negative error code on failure. + */ +int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, + u32 *response, u32 resp_size) +{ + u32 action, data __maybe_unused; + int ret; + + xe_gt_assert(gt, msg_len >= GUC_HXG_MSG_MIN_LEN); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_REQUEST); + + action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); + data = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]); + xe_gt_sriov_dbg_verbose(gt, "service action %#x:%u from VF%u\n", + action, data, origin); + + switch (action) { + case GUC_RELAY_ACTION_VF2PF_HANDSHAKE: + ret = pf_process_handshake_msg(gt, origin, msg, msg_len, response, resp_size); + break; + case GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME: + ret = pf_process_runtime_query_msg(gt, origin, msg, msg_len, response, resp_size); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +/** + * xe_gt_sriov_pf_service_print_runtime - Print PF runtime data shared with VFs. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * This function is for PF use only. + */ +int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p) +{ + const struct xe_reg *regs; + unsigned int size; + u32 *values; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + size = gt->sriov.pf.service.runtime.size; + regs = gt->sriov.pf.service.runtime.regs; + values = gt->sriov.pf.service.runtime.values; + + for (; size--; regs++, values++) { + drm_printf(p, "reg[%#x] = %#x\n", + xe_mmio_adjusted_addr(gt, regs->addr), *values); + } + + return 0; +} + +/** + * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * This function is for PF use only. + */ +int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_gt_sriov_pf_service_version *version; + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + for (n = 1; n <= total_vfs; n++) { + version = >->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); + } + + return 0; +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_gt_sriov_pf_service_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h new file mode 100644 index 000000000000..56aaadf0360d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_SERVICE_H_ +#define _XE_GT_SRIOV_PF_SERVICE_H_ + +#include <linux/errno.h> +#include <linux/types.h> + +struct drm_printer; +struct xe_gt; + +int xe_gt_sriov_pf_service_init(struct xe_gt *gt); +void xe_gt_sriov_pf_service_update(struct xe_gt *gt); +void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); + +int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); + +#ifdef CONFIG_PCI_IOV +int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, + u32 *response, u32 resp_size); +#else +static inline int +xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, + u32 *response, u32 resp_size) +{ + return -EPROTO; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h new file mode 100644 index 000000000000..ad6dd75f0056 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_GT_SRIOV_PF_SERVICE_TYPES_H_ + +#include <linux/types.h> + +struct xe_reg; + +/** + * struct xe_gt_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_gt_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_gt_sriov_pf_service_runtime_regs - Runtime data shared with VFs. + * @regs: pointer to static array with register offsets. + * @values: pointer to array with captured register values. + * @size: size of the regs and value arrays. + */ +struct xe_gt_sriov_pf_service_runtime_regs { + const struct xe_reg *regs; + u32 *values; + u32 size; +}; + +/** + * struct xe_gt_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + * @runtime: runtime data shared with VFs. + */ +struct xe_gt_sriov_pf_service { + struct { + struct xe_gt_sriov_pf_service_version base; + struct xe_gt_sriov_pf_service_version latest; + } version; + struct xe_gt_sriov_pf_service_runtime_regs runtime; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index faf9ee8266ce..880754f3e215 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -10,6 +10,7 @@ #include "xe_gt_sriov_pf_config_types.h" #include "xe_gt_sriov_pf_policy_types.h" +#include "xe_gt_sriov_pf_service_types.h" /** * struct xe_gt_sriov_metadata - GT level per-VF metadata. @@ -17,15 +18,19 @@ struct xe_gt_sriov_metadata { /** @config: per-VF provisioning data. */ struct xe_gt_sriov_config config; + /** @version: negotiated VF/PF ABI version */ + struct xe_gt_sriov_pf_service_version version; }; /** * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @service: service data. * @policy: policy data. * @spare: PF-only provisioning configuration. * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_policy policy; struct xe_gt_sriov_spare_config spare; struct xe_gt_sriov_metadata *vfs; diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h index 6c61e6f228a8..335c402b51a6 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h @@ -6,11 +6,8 @@ #ifndef _XE_GT_THROTTLE_SYSFS_H_ #define _XE_GT_THROTTLE_SYSFS_H_ -#include <drm/drm_managed.h> - struct xe_gt; int xe_gt_throttle_sysfs_init(struct xe_gt *gt); -#endif /* _XE_GT_THROTTLE_SYSFS_H_ */ - +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 93df2d7969b3..105797776a6c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -7,6 +7,7 @@ #include "abi/guc_actions_abi.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_guc.h" @@ -245,7 +246,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return seqno; xe_gt_tlb_invalidation_wait(gt, seqno); - } else if (xe_device_uc_enabled(xe)) { + } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, @@ -263,11 +264,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) } /** - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA + * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an + * address range + * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL - * @vma: VMA to invalidate + * @start: start address + * @end: end address + * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full * TLB invalidation. Completion of TLB is asynchronous and caller can either use @@ -277,17 +282,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma) +int xe_gt_tlb_invalidation_range(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + u64 start, u64 end, u32 asid) { struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; - xe_gt_assert(gt, vma); - /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { if (fence) @@ -301,9 +304,9 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, if (!xe->info.has_range_tlb_invalidation) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { - u64 start = xe_vma_start(vma); - u64 length = xe_vma_size(vma); - u64 align, end; + u64 orig_start = start; + u64 length = end - start; + u64 align; if (length < SZ_4K) length = SZ_4K; @@ -315,12 +318,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, * address mask covering the required range. */ align = roundup_pow_of_two(length); - start = ALIGN_DOWN(xe_vma_start(vma), align); - end = ALIGN(xe_vma_end(vma), align); + start = ALIGN_DOWN(start, align); + end = ALIGN(end, align); length = align; while (start + length < end) { length <<= 1; - start = ALIGN_DOWN(xe_vma_start(vma), length); + start = ALIGN_DOWN(orig_start, length); } /* @@ -329,16 +332,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, */ if (length >= SZ_2M) { length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(xe_vma_start(vma), length); + start = ALIGN_DOWN(orig_start, length); } xe_gt_assert(gt, length >= SZ_4K); xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); xe_gt_assert(gt, IS_ALIGNED(start, length)); action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = xe_vma_vm(vma)->usm.asid; + action[len++] = asid; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); action[len++] = ilog2(length) - ilog2(SZ_4K); @@ -350,6 +354,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, } /** + * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA + * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion, can be NULL + * @vma: VMA to invalidate + * + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can either use + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for + * completion. + * + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, + * negative error code on error. + */ +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma) +{ + xe_gt_assert(gt, vma); + + return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); +} + +/** * xe_gt_tlb_invalidation_wait - Wait for TLB to complete * @gt: graphics tile * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index fbb743d80d2c..bf3bebd9f985 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); +int xe_gt_tlb_invalidation_range(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + u64 start, u64 end, u32 asid); int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 3733e7a6860d..25ff03ab8448 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -108,7 +108,9 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, { unsigned long bit; - xe_assert(xe, fls(mask) <= patternbits); + xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits || + bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS)); + xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS); for_each_set_bit(bit, &mask, 32) { xe_l3_bank_mask_t shifted_pattern = {}; @@ -278,3 +280,13 @@ bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) return quad_first < (quad + 1) * dss_per_quad; } + +bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss) +{ + return test_bit(dss, gt->fuse_topo.g_dss_mask); +} + +bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) +{ + return test_bit(dss, gt->fuse_topo.c_dss_mask); +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index b3e357777a6e..746b325bbf6e 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -33,4 +33,7 @@ bool xe_dss_mask_empty(const xe_dss_mask_t mask); bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); +bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss); +bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss); + #endif /* _XE_GT_TOPOLOGY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index cfdc761ff7f4..8dc203413a27 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -110,8 +110,6 @@ struct xe_gt { struct { /** @info.type: type of GT */ enum xe_gt_type type; - /** @info.id: Unique ID of this GT within the PCI Device */ - u8 id; /** @info.reference_clock: clock frequency */ u32 reference_clock; /** @info.engine_mask: mask of engines present on GT */ @@ -124,6 +122,10 @@ struct xe_gt { u64 __engine_mask; /** @info.gmdid: raw GMD_ID value from hardware */ u32 gmdid; + /** @info.id: Unique ID of this GT within the PCI Device */ + u8 id; + /** @info.has_indirect_ring_state: GT has indirect ring state support */ + u8 has_indirect_ring_state:1; } info; /** diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 240e7a4bbff1..0c9938e0ab8c 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -451,7 +451,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) return 0; } -static int guc_wait_ucode(struct xe_guc *guc) +static void guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); u32 status; @@ -479,30 +479,26 @@ static int guc_wait_ucode(struct xe_guc *guc) 200000, &status, false); if (ret) { - xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status); - xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status), - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); - - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - xe_gt_info(gt, "GuC firmware signature verification failed\n"); - ret = -ENOEXEC; - } + xe_gt_err(gt, "GuC load failed: status = 0x%08X\n", status); + xe_gt_err(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) + xe_gt_err(gt, "GuC firmware signature verification failed\n"); if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == - XE_GUC_LOAD_STATUS_EXCEPTION) { - xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n", - xe_mmio_read32(gt, SOFT_SCRATCH(13))); - ret = -ENXIO; - } + XE_GUC_LOAD_STATUS_EXCEPTION) + xe_gt_err(gt, "GuC firmware exception. EIP: %#x\n", + xe_mmio_read32(gt, SOFT_SCRATCH(13))); + + xe_device_declare_wedged(gt_to_xe(gt)); } else { xe_gt_dbg(gt, "GuC successfully loaded\n"); } - - return ret; } static int __xe_guc_upload(struct xe_guc *guc) @@ -532,9 +528,7 @@ static int __xe_guc_upload(struct xe_guc *guc) goto out; /* Wait for authentication */ - ret = guc_wait_ucode(guc); - if (ret) - goto out; + guc_wait_ucode(guc); xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); return 0; @@ -891,17 +885,11 @@ void xe_guc_stop_prepare(struct xe_guc *guc) XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); } -int xe_guc_stop(struct xe_guc *guc) +void xe_guc_stop(struct xe_guc *guc) { - int ret; - xe_guc_ct_stop(&guc->ct); - ret = xe_guc_submit_stop(guc); - if (ret) - return ret; - - return 0; + xe_guc_submit_stop(guc); } int xe_guc_start(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 94f2dc5f6f90..a3c92b74a3d5 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -35,7 +35,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); int xe_guc_reset_prepare(struct xe_guc *guc); void xe_guc_reset_wait(struct xe_guc *guc); void xe_guc_stop_prepare(struct xe_guc *guc); -int xe_guc_stop(struct xe_guc *guc); +void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); bool xe_guc_in_reset(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 7f5a523795c8..9c33cca4e370 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -9,6 +9,7 @@ #include <generated/xe_wa_oob.h> +#include "abi/guc_actions_abi.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" @@ -16,6 +17,7 @@ #include "xe_gt.h" #include "xe_gt_ccs_mode.h" #include "xe_guc.h" +#include "xe_guc_ct.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" @@ -265,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); struct xe_gt *gt = ads_to_gt(ads); size_t total_size = 0, alloc_size, real_size; int class; @@ -274,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) if (!engine_enable_mask(gt, class)) continue; - real_size = xe_lrc_size(xe, class); + real_size = xe_gt_lrc_size(gt, class); alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; } @@ -440,11 +441,18 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) static void guc_policies_init(struct xe_guc_ads *ads) { + struct xe_device *xe = ads_to_xe(ads); + u32 global_flags = 0; + ads_blob_write(ads, policies.dpc_promote_time, GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); ads_blob_write(ads, policies.max_num_work_items, GLOBAL_POLICY_MAX_NUM_WI); - ads_blob_write(ads, policies.global_flags, 0); + + if (xe->wedged.mode == 2) + global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + + ads_blob_write(ads, policies.global_flags, global_flags); ads_blob_write(ads, policies.is_valid, 1); } @@ -765,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) xe_gt_assert(gt, gt->default_lrc[class]); - real_size = xe_lrc_size(xe, class); + real_size = xe_gt_lrc_size(gt, class); alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; @@ -799,3 +807,57 @@ void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) { guc_populate_golden_lrc(ads); } + +static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) +{ + struct xe_guc_ct *ct = &ads_to_guc(ads)->ct; + u32 action[] = { + XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, + policy_offset + }; + + return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); +} + +/** + * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy + * @ads: Additional data structures object + * + * This function update the GuC's engine reset policy based on wedged.mode. + * + * Return: 0 on success, and negative error code otherwise. + */ +int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_tile *tile = gt_to_tile(gt); + struct guc_policies *policies; + struct xe_bo *bo; + int ret = 0; + + policies = kmalloc(sizeof(*policies), GFP_KERNEL); + if (!policies) + return -ENOMEM; + + policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); + policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); + policies->is_valid = 1; + if (xe->wedged.mode == 2) + policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + else + policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; + + bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto out; + } + + ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); +out: + kfree(policies); + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h index 138ef6267671..2e6674c760ff 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.h +++ b/drivers/gpu/drm/xe/xe_guc_ads.h @@ -6,12 +6,13 @@ #ifndef _XE_GUC_ADS_H_ #define _XE_GUC_ADS_H_ -#include "xe_guc_ads_types.h" +struct xe_guc_ads; int xe_guc_ads_init(struct xe_guc_ads *ads); int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads); void xe_guc_ads_populate(struct xe_guc_ads *ads); void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads); void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads); +int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c index 8d9a0287df6b..6767e8076e6b 100644 --- a/drivers/gpu/drm/xe/xe_guc_db_mgr.c +++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c @@ -106,7 +106,8 @@ int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count) if (ret) return ret; done: - xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count); + xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell%s\n", + dbm->count, str_plural(dbm->count)); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c index 0fb7c6b78c31..cd0549d0ef89 100644 --- a/drivers/gpu/drm/xe/xe_guc_id_mgr.c +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c @@ -97,7 +97,8 @@ int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit) if (ret) return ret; - xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total); + xe_gt_info(idm_to_gt(idm), "using %u GUC ID%s\n", + idm->total, str_plural(idm->total)); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 509649d0e65e..d10aab29651e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -15,6 +15,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_idle.h" #include "xe_gt_sysfs.h" @@ -902,6 +903,9 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg) return; } + if (xe_device_wedged(xe)) + return; + XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index c0a2d8d5d3b3..c3bbaf474f9a 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -19,6 +19,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_pf_service.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" @@ -664,6 +665,7 @@ static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin, static int relay_action_handler(struct xe_guc_relay *relay, u32 origin, const u32 *msg, u32 len, u32 *response, u32 size) { + struct xe_gt *gt = relay_to_gt(relay); u32 type; int ret; @@ -674,8 +676,10 @@ static int relay_action_handler(struct xe_guc_relay *relay, u32 origin, type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]); - /* XXX: PF services will be added later */ - ret = -EOPNOTSUPP; + if (IS_SRIOV_PF(relay_to_xe(relay))) + ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size); + else + ret = -EOPNOTSUPP; if (type == GUC_HXG_TYPE_EVENT) relay_assert(relay, ret <= 0); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index c7d38469fb46..fde527d34f58 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -52,13 +52,14 @@ exec_queue_to_guc(struct xe_exec_queue *q) * engine done being processed). */ #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) -#define ENGINE_STATE_ENABLED (1 << 1) -#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) +#define EXEC_QUEUE_STATE_ENABLED (1 << 1) +#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) -#define ENGINE_STATE_SUSPENDED (1 << 5) -#define EXEC_QUEUE_STATE_RESET (1 << 6) -#define ENGINE_STATE_KILLED (1 << 7) +#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) +#define EXEC_QUEUE_STATE_RESET (1 << 6) +#define EXEC_QUEUE_STATE_KILLED (1 << 7) +#define EXEC_QUEUE_STATE_WEDGED (1 << 8) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -77,17 +78,17 @@ static void clear_exec_queue_registered(struct xe_exec_queue *q) static bool exec_queue_enabled(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; } static void set_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); } static void clear_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); + atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); } static bool exec_queue_pending_enable(struct xe_exec_queue *q) @@ -142,17 +143,17 @@ static void set_exec_queue_banned(struct xe_exec_queue *q) static bool exec_queue_suspended(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; } static void set_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); } static void clear_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); + atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); } static bool exec_queue_reset(struct xe_exec_queue *q) @@ -167,17 +168,28 @@ static void set_exec_queue_reset(struct xe_exec_queue *q) static bool exec_queue_killed(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; } static void set_exec_queue_killed(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_KILLED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); } -static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) +static bool exec_queue_wedged(struct xe_exec_queue *q) { - return exec_queue_killed(q) || exec_queue_banned(q); + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; +} + +static void set_exec_queue_wedged(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); +} + +static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) +{ + return exec_queue_banned(q) || (atomic_read(&q->guc->state) & + (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED)); } #ifdef CONFIG_PROVE_LOCKING @@ -240,6 +252,17 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } +static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (exec_queue_wedged(q)) + xe_exec_queue_put(q); +} + static const struct xe_exec_queue_ops guc_exec_queue_ops; static void primelockdep(struct xe_guc *guc) @@ -250,7 +273,6 @@ static void primelockdep(struct xe_guc *guc) fs_reclaim_acquire(GFP_KERNEL); mutex_lock(&guc->submission_state.lock); - might_lock(&guc->submission_state.suspend.lock); mutex_unlock(&guc->submission_state.lock); fs_reclaim_release(GFP_KERNEL); @@ -278,9 +300,6 @@ int xe_guc_submit_init(struct xe_guc *guc) xa_init(&guc->submission_state.exec_queue_lookup); - spin_lock_init(&guc->submission_state.suspend.lock); - guc->submission_state.suspend.context = dma_fence_context_alloc(1); - primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); @@ -430,9 +449,9 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ field_, val_) -static void __register_mlrc_engine(struct xe_guc *guc, - struct xe_exec_queue *q, - struct guc_ctxt_registration_info *info) +static void __register_mlrc_exec_queue(struct xe_guc *guc, + struct xe_exec_queue *q, + struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) struct xe_device *xe = guc_to_xe(guc); @@ -469,8 +488,8 @@ static void __register_mlrc_engine(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, len, 0, 0); } -static void __register_engine(struct xe_guc *guc, - struct guc_ctxt_registration_info *info) +static void __register_exec_queue(struct xe_guc *guc, + struct guc_ctxt_registration_info *info) { u32 action[] = { XE_GUC_ACTION_REGISTER_CONTEXT, @@ -490,7 +509,7 @@ static void __register_engine(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void register_engine(struct xe_exec_queue *q) +static void register_exec_queue(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); @@ -538,9 +557,9 @@ static void register_engine(struct xe_exec_queue *q) set_exec_queue_registered(q); trace_xe_exec_queue_register(q); if (xe_exec_queue_is_parallel(q)) - __register_mlrc_engine(guc, q, &info); + __register_mlrc_exec_queue(guc, q, &info); else - __register_engine(guc, &info); + __register_exec_queue(guc, &info); init_policies(guc, q); } @@ -658,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) if (xe_exec_queue_is_parallel(q)) wq_item_append(q); else - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) return; @@ -708,9 +727,9 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) trace_xe_sched_job_run(job); - if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { + if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) - register_engine(q); + register_exec_queue(q); if (!lr) /* LR jobs are emitted in the exec IOCTL */ q->ring_ops->emit_job(job); submit_exec_queue(q); @@ -844,6 +863,40 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } +static bool guc_submit_hint_wedged(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + unsigned long index; + int err; + + if (xe->wedged.mode != 2) + return false; + + if (xe_device_wedged(xe)) + return true; + + xe_device_declare_wedged(xe); + + xe_guc_submit_reset_prepare(guc); + xe_guc_ct_stop(&guc->ct); + + err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + guc_submit_wedged_fini, guc); + if (err) { + drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); + return true; /* Device is wedged anyway */ + } + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (xe_exec_queue_get_unless_zero(q)) + set_exec_queue_wedged(q); + mutex_unlock(&guc->submission_state.lock); + + return true; +} + static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) { struct xe_guc_exec_queue *ge = @@ -852,10 +905,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; + bool wedged; xe_assert(xe, xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -870,7 +926,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) * xe_guc_deregister_done_handler() which treats it as an unexpected * state. */ - if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { + if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { struct xe_guc *guc = exec_queue_to_guc(q); int ret; @@ -905,6 +961,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); int err = -ETIME; int i = 0; + bool wedged; /* * TDR has fired before free job worker. Common if exec queue @@ -923,11 +980,15 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), "VM job timed out on non-killed execqueue\n"); - simple_error_capture(q); - xe_devcoredump(job); + if (!exec_queue_killed(q)) { + simple_error_capture(q); + xe_devcoredump(job); + } trace_xe_sched_job_timedout(job); + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -935,8 +996,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * Kernel jobs should never fail, nor should VM jobs if they do * somethings has gone wrong and the GT needs a reset */ - if (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { + if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { if (!xe_sched_invalidate_job(job, 2)) { xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); @@ -946,7 +1007,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) } /* Engine state now stable, disable scheduling if needed */ - if (exec_queue_registered(q)) { + if (!wedged && exec_queue_registered(q)) { struct xe_guc *guc = exec_queue_to_guc(q); int ret; @@ -989,6 +1050,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) */ xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); + xe_guc_exec_queue_trigger_cleanup(q); /* Mark all outstanding jobs as bad, thus completing them */ @@ -1028,7 +1090,7 @@ static void guc_exec_queue_fini_async(struct xe_exec_queue *q) INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ - if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) + if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else queue_work(system_wq, &q->guc->fini_async); @@ -1063,7 +1125,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) { - return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); + return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); } static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) @@ -1274,7 +1336,7 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else __guc_exec_queue_fini(exec_queue_to_guc(q), q); @@ -1285,7 +1347,8 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q, { struct xe_sched_msg *msg; - if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) + if (q->sched_props.priority == priority || + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1303,7 +1366,7 @@ static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_u struct xe_sched_msg *msg; if (q->sched_props.timeslice_us == timeslice_us || - exec_queue_killed_or_banned(q)) + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1322,7 +1385,7 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, struct xe_sched_msg *msg; if (q->sched_props.preempt_timeout_us == preempt_timeout_us || - exec_queue_killed_or_banned(q)) + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1339,7 +1402,7 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) return -EINVAL; q->guc->suspend_pending = true; @@ -1410,7 +1473,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) set_exec_queue_suspended(q); suspend_fence_signal(q); } - atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, + atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); q->guc->resume_time = 0; trace_xe_exec_queue_stop(q); @@ -1458,7 +1521,7 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc) wait_event(guc->ct.wq, !guc_read_stopped(guc)); } -int xe_guc_submit_stop(struct xe_guc *guc) +void xe_guc_submit_stop(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; @@ -1478,14 +1541,13 @@ int xe_guc_submit_stop(struct xe_guc *guc) * creation which is protected by guc->submission_state.lock. */ - return 0; } static void guc_exec_queue_start(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; - if (!exec_queue_killed_or_banned(q)) { + if (!exec_queue_killed_or_banned_or_wedged(q)) { int i; trace_xe_exec_queue_resubmit(q); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index fad0421ead36..4275b7da9df5 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -16,7 +16,7 @@ int xe_guc_submit_init(struct xe_guc *guc); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); -int xe_guc_submit_stop(struct xe_guc *guc); +void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 82bd93f7867d..546ac6350a31 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -72,15 +72,6 @@ struct xe_guc { atomic_t stopped; /** @submission_state.lock: protects submission state */ struct mutex lock; - /** @submission_state.suspend: suspend fence state */ - struct { - /** @submission_state.suspend.lock: suspend fences lock */ - spinlock_t lock; - /** @submission_state.suspend.context: suspend fences context */ - u64 context; - /** @submission_state.suspend.seqno: suspend fences seqno */ - u32 seqno; - } suspend; #ifdef CONFIG_PROVE_LOCKING #define NUM_SUBMIT_WQ 256 /** @submission_state.submit_wq_pool: submission ordered workqueues pool */ diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 455f375c1cbd..45f582a7caaa 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -18,6 +18,7 @@ #include "xe_gt.h" #include "xe_gt_ccs_mode.h" #include "xe_gt_printk.h" +#include "xe_gt_mcr.h" #include "xe_gt_topology.h" #include "xe_hw_fence.h" #include "xe_irq.h" @@ -716,6 +717,11 @@ static void check_gsc_availability(struct xe_gt *gt) */ if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); + + /* interrupts where previously enabled, so turn them off */ + xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0); + xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0); + drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); } } @@ -766,6 +772,57 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) xe_hw_fence_irq_run(hwe->fence_irq); } +static bool +is_slice_common_per_gslice(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) >= 1255; +} + +static void +xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, + struct xe_hw_engine_snapshot *snapshot) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + unsigned int dss; + u16 group, instance; + + snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); + + if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) + return; + + if (is_slice_common_per_gslice(xe) == false) { + snapshot->reg.instdone.slice_common[0] = + xe_mmio_read32(gt, SC_INSTDONE); + snapshot->reg.instdone.slice_common_extra[0] = + xe_mmio_read32(gt, SC_INSTDONE_EXTRA); + snapshot->reg.instdone.slice_common_extra2[0] = + xe_mmio_read32(gt, SC_INSTDONE_EXTRA2); + } else { + for_each_geometry_dss(dss, gt, group, instance) { + snapshot->reg.instdone.slice_common[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance); + snapshot->reg.instdone.slice_common_extra[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance); + snapshot->reg.instdone.slice_common_extra2[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance); + } + } + + for_each_geometry_dss(dss, gt, group, instance) { + snapshot->reg.instdone.sampler[dss] = + xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance); + snapshot->reg.instdone.row[dss] = + xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance); + + if (GRAPHICS_VERx100(xe) >= 1255) + snapshot->reg.instdone.geom_svg[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT, + group, instance); + } +} + /** * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. * @hwe: Xe HW Engine. @@ -780,6 +837,7 @@ struct xe_hw_engine_snapshot * xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) { struct xe_hw_engine_snapshot *snapshot; + size_t len; u64 val; if (!xe_hw_engine_is_valid(hwe)) @@ -790,8 +848,30 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) if (!snapshot) return NULL; + /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it + * includes xe_hw_engine_types.h the length of this 3 registers can't be + * set in struct xe_hw_engine_snapshot, so here doing additional + * allocations. + */ + len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32)); + snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC); + if (!snapshot->reg.instdone.slice_common || + !snapshot->reg.instdone.slice_common_extra || + !snapshot->reg.instdone.slice_common_extra2 || + !snapshot->reg.instdone.sampler || + !snapshot->reg.instdone.row || + !snapshot->reg.instdone.geom_svg) { + xe_hw_engine_snapshot_free(snapshot); + return NULL; + } + snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); - snapshot->class = hwe->class; + snapshot->hwe = hwe; snapshot->logical_instance = hwe->logical_instance; snapshot->forcewake.domain = hwe->domain; snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), @@ -828,6 +908,13 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + if (xe_gt_has_indirect_ring_state(hwe->gt)) { + snapshot->reg.indirect_ring_state = + hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + snapshot->reg.ring_start_udw = + hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + } + snapshot->reg.ring_head = hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; snapshot->reg.ring_tail = @@ -841,13 +928,57 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); - if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); return snapshot; } +static void +xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p) +{ + struct xe_gt *gt = snapshot->hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + u16 group, instance; + unsigned int dss; + + drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring); + + if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) + return; + + if (is_slice_common_per_gslice(xe) == false) { + drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n", + snapshot->reg.instdone.slice_common[0]); + drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n", + snapshot->reg.instdone.slice_common_extra[0]); + drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n", + snapshot->reg.instdone.slice_common_extra2[0]); + } else { + for_each_geometry_dss(dss, gt, group, instance) { + drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.slice_common[dss]); + drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.slice_common_extra[dss]); + drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.slice_common_extra2[dss]); + } + } + + for_each_geometry_dss(dss, gt, group, instance) { + drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.sampler[dss]); + drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.row[dss]); + + if (GRAPHICS_VERx100(xe) >= 1255) + drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n", + dss, snapshot->reg.instdone.geom_svg[dss]); + } +} + /** * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. * @snapshot: Xe HW Engine snapshot object. @@ -873,6 +1004,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n", snapshot->reg.ring_execlist_sq_contents); drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); + drm_printf(p, "\tRING_START_UDW: 0x%08x\n", + snapshot->reg.ring_start_udw); drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); @@ -886,10 +1019,15 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd); drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); + drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n", + snapshot->reg.indirect_ring_state); drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); - if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + xe_hw_engine_snapshot_instdone_print(snapshot, p); + + if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", snapshot->reg.rcu_mode); + drm_puts(p, "\n"); } /** @@ -904,6 +1042,12 @@ void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) if (!snapshot) return; + kfree(snapshot->reg.instdone.slice_common); + kfree(snapshot->reg.instdone.slice_common_extra); + kfree(snapshot->reg.instdone.slice_common_extra2); + kfree(snapshot->reg.instdone.sampler); + kfree(snapshot->reg.instdone.row); + kfree(snapshot->reg.instdone.geom_svg); kfree(snapshot->name); kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index d7f828c76cc5..5f4b67acba99 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -158,8 +158,8 @@ struct xe_hw_engine { struct xe_hw_engine_snapshot { /** @name: name of the hw engine */ char *name; - /** @class: class of this hw engine */ - enum xe_engine_class class; + /** @hwe: hw engine */ + struct xe_hw_engine *hwe; /** @logical_instance: logical instance of this hw engine */ u16 logical_instance; /** @forcewake: Force Wake information snapshot */ @@ -189,6 +189,8 @@ struct xe_hw_engine_snapshot { u32 ring_hws_pga; /** @reg.ring_start: RING_START */ u32 ring_start; + /** @reg.ring_start_udw: RING_START_UDW */ + u32 ring_start_udw; /** @reg.ring_head: RING_HEAD */ u32 ring_head; /** @reg.ring_tail: RING_TAIL */ @@ -207,10 +209,28 @@ struct xe_hw_engine_snapshot { u32 ring_emr; /** @reg.ring_eir: RING_EIR */ u32 ring_eir; + /** @reg.indirect_ring_state: INDIRECT_RING_STATE */ + u32 indirect_ring_state; /** @reg.ipehr: IPEHR */ u32 ipehr; /** @reg.rcu_mode: RCU_MODE */ u32 rcu_mode; + struct { + /** @reg.instdone.ring: RING_INSTDONE */ + u32 ring; + /** @reg.instdone.slice_common: SC_INSTDONE */ + u32 *slice_common; + /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */ + u32 *slice_common_extra; + /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */ + u32 *slice_common_extra2; + /** @reg.instdone.sampler: SAMPLER_INSTDONE */ + u32 *sampler; + /** @reg.instdone.row: ROW_INSTDONE */ + u32 *row; + /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */ + u32 *geom_svg; + } instdone; } reg; }; diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 453e601ddd5e..dca275117232 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -550,12 +550,17 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) { u32 uval; + /* hwmon sysfs attribute of current available only for package */ + if (channel != CHANNEL_PKG) + return 0; + switch (attr) { case hwmon_curr_crit: - case hwmon_curr_label: - if (channel == CHANNEL_PKG) return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + case hwmon_curr_label: + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444; break; default: return 0; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 615bbc372ac6..9b0a4078add3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -11,7 +11,6 @@ #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_gfx_state_commands.h" #include "regs/xe_engine_regs.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_lrc_layout.h" #include "xe_bb.h" #include "xe_bo.h" @@ -34,12 +33,15 @@ #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) +#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K + struct xe_lrc_snapshot { struct xe_bo *lrc_bo; void *lrc_snapshot; unsigned long lrc_size, lrc_offset; u32 context_desc; + u32 indirect_context_desc; u32 head; struct { u32 internal; @@ -55,20 +57,25 @@ lrc_to_xe(struct xe_lrc *lrc) return gt_to_xe(lrc->fence_ctx.gt); } -size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) +size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { + struct xe_device *xe = gt_to_xe(gt); + size_t size; + switch (class) { case XE_ENGINE_CLASS_RENDER: if (GRAPHICS_VER(xe) >= 20) - return 4 * SZ_4K; + size = 4 * SZ_4K; else - return 14 * SZ_4K; + size = 14 * SZ_4K; + break; case XE_ENGINE_CLASS_COMPUTE: /* 14 pages since graphics_ver == 11 */ if (GRAPHICS_VER(xe) >= 20) - return 3 * SZ_4K; + size = 3 * SZ_4K; else - return 14 * SZ_4K; + size = 14 * SZ_4K; + break; default: WARN(1, "Unknown engine class: %d", class); fallthrough; @@ -76,8 +83,14 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: case XE_ENGINE_CLASS_OTHER: - return 2 * SZ_4K; + size = 2 * SZ_4K; } + + /* Add indirect ring state page */ + if (xe_gt_has_indirect_ring_state(gt)) + size += LRC_INDIRECT_RING_STATE_SIZE; + + return size; } /* @@ -508,6 +521,32 @@ static const u8 xe2_xcs_offsets[] = { 0 }; +static const u8 xe2_indirect_ring_state_offsets[] = { + NOP(1), /* [0x00] */ + LRI(5, POSTED), /* [0x01] */ + REG(0x034), /* [0x02] RING_BUFFER_HEAD */ + REG(0x030), /* [0x04] RING_BUFFER_TAIL */ + REG(0x038), /* [0x06] RING_BUFFER_START */ + REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ + REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ + + NOP(5), /* [0x0c] */ + LRI(9, POSTED), /* [0x11] */ + REG(0x168), /* [0x12] BB_ADDR_UDW */ + REG(0x140), /* [0x14] BB_ADDR */ + REG(0x110), /* [0x16] BB_STATE */ + REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ + + NOP(12), /* [0x00] */ + + 0 +}; + #undef REG16 #undef REG #undef LRI @@ -546,6 +585,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (xe_gt_has_indirect_ring_state(hwe->gt)) + regs[CTX_CONTEXT_CONTROL] |= + _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); + /* TODO: Timestamp */ } @@ -589,6 +632,11 @@ static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) regs[x + 1] |= STOP_RING << 16; } +static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) +{ + return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; +} + static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) { return 0; @@ -643,6 +691,12 @@ static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; } +static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) +{ + /* Indirect ring state page is at the very end of LRC */ + return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; +} + #define DECL_MAP_ADDR_HELPERS(elem) \ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ { \ @@ -663,6 +717,7 @@ DECL_MAP_ADDR_HELPERS(seqno) DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(parallel) +DECL_MAP_ADDR_HELPERS(indirect_ring) #undef DECL_MAP_ADDR_HELPERS @@ -671,6 +726,35 @@ u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) return __xe_lrc_pphwsp_ggtt_addr(lrc); } +u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) +{ + if (!xe_lrc_has_indirect_ring_state(lrc)) + return 0; + + return __xe_lrc_indirect_ring_ggtt_addr(lrc); +} + +static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_indirect_ring_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + return xe_map_read32(xe, &map); +} + +static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, + int reg_nr, u32 val) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_indirect_ring_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + xe_map_write32(xe, &map, val); +} + u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) { struct xe_device *xe = lrc_to_xe(lrc); @@ -693,20 +777,25 @@ void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) static void *empty_lrc_data(struct xe_hw_engine *hwe) { - struct xe_device *xe = gt_to_xe(hwe->gt); + struct xe_gt *gt = hwe->gt; void *data; u32 *regs; - data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); + data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); if (!data) return NULL; /* 1st page: Per-Process of HW status Page */ regs = data + LRC_PPHWSP_SIZE; - set_offsets(regs, reg_offsets(xe, hwe->class), hwe); + set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); set_context_control(regs, hwe); set_memory_based_intr(regs, hwe); reset_stop_ring(regs, hwe); + if (xe_gt_has_indirect_ring_state(gt)) { + regs = data + xe_gt_lrc_size(gt, hwe->class) - + LRC_INDIRECT_RING_STATE_SIZE; + set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); + } return data; } @@ -731,23 +820,27 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct iosys_map map; void *init_data = NULL; u32 arb_enable; + u32 lrc_size; int err; lrc->flags = 0; + lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + if (xe_gt_has_indirect_ring_state(gt)) + lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, - ring_size + xe_lrc_size(xe, hwe->class), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + lrc->size = lrc_size; lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; @@ -772,10 +865,10 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); + xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); } else { xe_map_memcpy_to(xe, &map, 0, init_data, - xe_lrc_size(xe, hwe->class)); + xe_gt_lrc_size(gt, hwe->class)); kfree(init_data); } @@ -786,11 +879,25 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_drm_client_add_bo(vm->xef->client, lrc->bo); } - xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + if (xe_gt_has_indirect_ring_state(gt)) { + xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, + __xe_lrc_indirect_ring_ggtt_addr(lrc)); + + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, + __xe_lrc_ring_ggtt_addr(lrc)); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + } else { + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + } + if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -834,14 +941,36 @@ void xe_lrc_finish(struct xe_lrc *lrc) xe_bo_put(lrc->bo); } +void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); + else + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); +} + +u32 xe_lrc_ring_tail(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; + else + return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; +} + void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) { - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); + if (xe_lrc_has_indirect_ring_state(lrc)) + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); + else + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); } u32 xe_lrc_ring_head(struct xe_lrc *lrc) { - return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; + if (xe_lrc_has_indirect_ring_state(lrc)) + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; + else + return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; } u32 xe_lrc_ring_space(struct xe_lrc *lrc) @@ -1214,7 +1343,7 @@ void xe_lrc_dump_default(struct drm_printer *p, * hardware status page. */ dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; - remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; + remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; while (remaining_dw > 0) { if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { @@ -1354,10 +1483,11 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc)); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); + snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); snapshot->tail.internal = lrc->ring.tail; - snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); + snapshot->tail.memory = xe_lrc_ring_tail(lrc); snapshot->start_seqno = xe_lrc_start_seqno(lrc); snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); @@ -1382,7 +1512,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) if (!snapshot->lrc_snapshot) goto put_bo; - dma_resv_lock(bo->ttm.base.resv, NULL); + xe_bo_lock(bo, false); if (!ttm_bo_vmap(&bo->ttm, &src)) { xe_map_memcpy_from(xe_bo_device(bo), snapshot->lrc_snapshot, &src, snapshot->lrc_offset, @@ -1392,7 +1522,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) kvfree(snapshot->lrc_snapshot); snapshot->lrc_snapshot = NULL; } - dma_resv_unlock(bo->ttm.base.resv); + xe_bo_unlock(bo); put_bo: xe_bo_put(bo); } @@ -1405,6 +1535,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer return; drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); + drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", + snapshot->indirect_context_desc); drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", snapshot->tail.internal, snapshot->tail.memory); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index d32fa31faa2c..e0e841963c23 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -5,14 +5,17 @@ #ifndef _XE_LRC_H_ #define _XE_LRC_H_ -#include "xe_lrc_types.h" +#include <linux/types.h> struct drm_printer; struct xe_bb; struct xe_device; struct xe_exec_queue; enum xe_engine_class; +struct xe_gt; struct xe_hw_engine; +struct xe_lrc; +struct xe_lrc_snapshot; struct xe_vm; #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) @@ -21,14 +24,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size); void xe_lrc_finish(struct xe_lrc *lrc); -size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); +size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); +void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail); +u32 xe_lrc_ring_tail(struct xe_lrc *lrc); void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); u32 xe_lrc_ring_head(struct xe_lrc *lrc); u32 xe_lrc_ring_space(struct xe_lrc *lrc); void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); +u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index b716df0dfb4e..cdbf03faef15 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -20,10 +20,14 @@ struct xe_lrc { */ struct xe_bo *bo; + /** @size: size of lrc including any indirect ring state page */ + u32 size; + /** @tile: tile which this LRC belongs to */ struct xe_tile *tile; /** @flags: LRC flags */ +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 u32 flags; /** @ring: submission ring state */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9f6e9b7f11c8..36db5ed1a572 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -14,8 +14,8 @@ #include <generated/xe_wa_oob.h> +#include "instructions/xe_gpu_commands.h" #include "instructions/xe_mi_commands.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gtt_defs.h" #include "tests/xe_test.h" #include "xe_assert.h" diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 334637511e75..05edab0e085d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -15,6 +15,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" @@ -423,41 +424,33 @@ int xe_mmio_init(struct xe_device *xe) u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) @@ -486,10 +479,9 @@ bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg) { - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - return range && reg.addr >= range->start && reg.addr <= range->end; + return range && addr >= range->start && addr <= range->end; } /** @@ -519,10 +511,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; u32 ldw, udw, oldudw, retries; - if (reg.addr < gt->mmio.adj_limit) { - reg.addr += gt->mmio.adj_offset; - reg_udw.addr += gt->mmio.adj_offset; - } + reg.addr = xe_mmio_adjusted_addr(gt, reg.addr); + reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr); + + /* we shouldn't adjust just one register address */ + xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4); oldudw = xe_mmio_read32(gt, reg_udw); for (retries = 5; retries; --retries) { diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index a3cd7b3036c7..445ec6a0753e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -36,4 +36,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, u32 *out_val, bool atomic); +static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr) +{ + if (addr < gt->mmio.adj_limit) + addr += gt->mmio.adj_offset; + return addr; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 1e92f8ee07ba..f04754ad911b 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -9,10 +9,12 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_step_types.h" @@ -36,34 +38,23 @@ struct xe_mocs_entry { u16 used; }; +struct xe_mocs_info; + +struct xe_mocs_ops { + void (*dump)(struct xe_mocs_info *mocs, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p); +}; + struct xe_mocs_info { unsigned int size; unsigned int n_entries; const struct xe_mocs_entry *table; + const struct xe_mocs_ops *ops; u8 uc_index; u8 wb_index; u8 unused_entries_index; }; -/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ -#define _LE_CACHEABILITY(value) ((value) << 0) -#define _LE_TGT_CACHE(value) ((value) << 2) -#define LE_LRUM(value) ((value) << 4) -#define LE_AOM(value) ((value) << 6) -#define LE_RSC(value) ((value) << 7) -#define LE_SCC(value) ((value) << 8) -#define LE_PFM(value) ((value) << 11) -#define LE_SCF(value) ((value) << 14) -#define LE_COS(value) ((value) << 15) -#define LE_SSE(value) ((value) << 17) - -/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ -#define L3_ESC(value) ((value) << 0) -#define L3_SCC(value) ((value) << 1) -#define _L3_CACHEABILITY(value) ((value) << 4) -#define L3_GLBGO(value) ((value) << 6) -#define L3_LKUP(value) ((value) << 7) - /* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */ #define IG_PAT REG_BIT(8) #define L3_CACHE_POLICY_MASK REG_GENMASK(5, 4) @@ -80,22 +71,22 @@ struct xe_mocs_info { * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means * the same as LE_UC */ -#define LE_0_PAGETABLE _LE_CACHEABILITY(0) -#define LE_1_UC _LE_CACHEABILITY(1) -#define LE_2_WT _LE_CACHEABILITY(2) -#define LE_3_WB _LE_CACHEABILITY(3) +#define LE_0_PAGETABLE LE_CACHEABILITY(0) +#define LE_1_UC LE_CACHEABILITY(1) +#define LE_2_WT LE_CACHEABILITY(2) +#define LE_3_WB LE_CACHEABILITY(3) /* Target cache */ -#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) -#define LE_TC_1_LLC _LE_TGT_CACHE(1) -#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) -#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) +#define LE_TC_0_PAGETABLE LE_TGT_CACHE(0) +#define LE_TC_1_LLC LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT LE_TGT_CACHE(3) /* L3 caching options */ -#define L3_0_DIRECT _L3_CACHEABILITY(0) -#define L3_1_UC _L3_CACHEABILITY(1) -#define L3_2_RESERVED _L3_CACHEABILITY(2) -#define L3_3_WB _L3_CACHEABILITY(3) +#define L3_0_DIRECT L3_CACHEABILITY(0) +#define L3_1_UC L3_CACHEABILITY(1) +#define L3_2_RESERVED L3_CACHEABILITY(2) +#define L3_3_WB L3_CACHEABILITY(3) /* L4 caching options */ #define L4_0_WB REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0) @@ -107,6 +98,8 @@ struct xe_mocs_info { #define XE2_L3_1_XD REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1) #define XE2_L3_3_UC REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3) +#define XE2_L3_CLOS_MASK REG_GENMASK(7, 6) + #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ [__idx] = { \ .control_value = __control_value, \ @@ -255,6 +248,84 @@ static const struct xe_mocs_entry gen12_mocs_desc[] = { L3_1_UC) }; +static bool regs_are_mcr(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe_gt_is_media_type(gt)) + return MEDIA_VER(xe) >= 20; + else + return GRAPHICS_VERx100(xe) >= 1250; +} + +static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i, j; + u32 reg_val; + + drm_printf(p, "LNCFCMOCS[idx] = [ESC, SCC, L3CC] (value)\n\n"); + + for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j++, + !!(reg_val & L3_ESC_MASK), + REG_FIELD_GET(L3_SCC_MASK, reg_val), + REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val), + reg_val); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j, + !!(reg_val & L3_UPPER_IDX_ESC_MASK), + REG_FIELD_GET(L3_UPPER_IDX_SCC_MASK, reg_val), + REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val), + reg_val); + } +} + +static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i; + u32 reg_val; + + if (flags & HAS_GLOBAL_MOCS) { + drm_printf(p, "Global mocs table configuration:\n"); + drm_printf(p, "GLOB_MOCS[idx] = [LeCC, TC, LRUM, AOM, RSC, SCC, PFM, SCF, CoS, SSE] (value)\n\n"); + + for (i = 0; i < info->n_entries; i++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + + drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n", + i, + REG_FIELD_GET(LE_CACHEABILITY_MASK, reg_val), + REG_FIELD_GET(LE_TGT_CACHE_MASK, reg_val), + REG_FIELD_GET(LE_LRUM_MASK, reg_val), + !!(reg_val & LE_AOM_MASK), + !!(reg_val & LE_RSC_MASK), + REG_FIELD_GET(LE_SCC_MASK, reg_val), + REG_FIELD_GET(LE_PFM_MASK, reg_val), + !!(reg_val & LE_SCF_MASK), + REG_FIELD_GET(LE_COS_MASK, reg_val), + REG_FIELD_GET(LE_SSE_MASK, reg_val), + reg_val); + } + } + + xelp_lncf_dump(info, gt, p); +} + +static const struct xe_mocs_ops xelp_mocs_ops = { + .dump = xelp_mocs_dump, +}; + static const struct xe_mocs_entry dg1_mocs_desc[] = { /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), @@ -291,6 +362,40 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; +static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i, j; + u32 reg_val; + + drm_printf(p, "LNCFCMOCS[idx] = [UCL3LOOKUP, GLBGO, L3CC] (value)\n\n"); + + for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j++, + !!(reg_val & L3_LKUP_MASK), + !!(reg_val & L3_GLBGO_MASK), + REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val), + reg_val); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j, + !!(reg_val & L3_UPPER_LKUP_MASK), + !!(reg_val & L3_UPPER_GLBGO_MASK), + REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val), + reg_val); + } +} + +static const struct xe_mocs_ops xehp_mocs_ops = { + .dump = xehp_lncf_dump, +}; + static const struct xe_mocs_entry pvc_mocs_desc[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -302,6 +407,36 @@ static const struct xe_mocs_entry pvc_mocs_desc[] = { MOCS_ENTRY(2, 0, L3_3_WB), }; +static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct xe_gt *gt, + struct drm_printer *p) +{ + unsigned int i, j; + u32 reg_val; + + drm_printf(p, "LNCFCMOCS[idx] = [ L3CC ] (value)\n\n"); + + for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + + drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n", + j++, + REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val), + reg_val); + + drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n", + j, + REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val), + reg_val); + } +} + +static const struct xe_mocs_ops pvc_mocs_ops = { + .dump = pvc_mocs_dump, +}; + static const struct xe_mocs_entry mtl_mocs_desc[] = { /* Error - Reserved for Non-Use */ MOCS_ENTRY(0, @@ -353,6 +488,36 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = { L3_GLBGO(1) | L3_1_UC), }; +static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i; + u32 reg_val; + + drm_printf(p, "Global mocs table configuration:\n"); + drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L4_CACHE_POLICY] (value)\n\n"); + + for (i = 0; i < info->n_entries; i++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + + drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u] (%#8x)\n", + i, + !!(reg_val & IG_PAT), + REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val), + reg_val); + } + + /* MTL lncf mocs table pattern is similar to that of xehp */ + xehp_lncf_dump(info, flags, gt, p); +} + +static const struct xe_mocs_ops mtl_mocs_ops = { + .dump = mtl_mocs_dump, +}; + static const struct xe_mocs_entry xe2_mocs_table[] = { /* Defer to PAT */ MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0), @@ -366,6 +531,34 @@ static const struct xe_mocs_entry xe2_mocs_table[] = { MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), }; +static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i; + u32 reg_val; + + drm_printf(p, "Global mocs table configuration:\n"); + drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L3_CLOS, L3_CACHE_POLICY, L4_CACHE_POLICY] (value)\n\n"); + + for (i = 0; i < info->n_entries; i++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + + drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u] (%#8x)\n", + i, + !!(reg_val & IG_PAT), + REG_FIELD_GET(XE2_L3_CLOS_MASK, reg_val), + REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val), + reg_val); + } +} + +static const struct xe_mocs_ops xe2_mocs_ops = { + .dump = xe2_mocs_dump, +}; + static unsigned int get_mocs_settings(struct xe_device *xe, struct xe_mocs_info *info) { @@ -376,6 +569,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, switch (xe->info.platform) { case XE_LUNARLAKE: case XE_BATTLEMAGE: + info->ops = &xe2_mocs_ops; info->size = ARRAY_SIZE(xe2_mocs_table); info->table = xe2_mocs_table; info->n_entries = XE2_NUM_MOCS_ENTRIES; @@ -384,6 +578,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 4; break; case XE_PVC: + info->ops = &pvc_mocs_ops; info->size = ARRAY_SIZE(pvc_mocs_desc); info->table = pvc_mocs_desc; info->n_entries = PVC_NUM_MOCS_ENTRIES; @@ -392,6 +587,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 2; break; case XE_METEORLAKE: + info->ops = &mtl_mocs_ops; info->size = ARRAY_SIZE(mtl_mocs_desc); info->table = mtl_mocs_desc; info->n_entries = MTL_NUM_MOCS_ENTRIES; @@ -399,6 +595,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 1; break; case XE_DG2: + info->ops = &xehp_mocs_ops; info->size = ARRAY_SIZE(dg2_mocs_desc); info->table = dg2_mocs_desc; info->uc_index = 1; @@ -410,6 +607,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 3; break; case XE_DG1: + info->ops = &xelp_mocs_ops; info->size = ARRAY_SIZE(dg1_mocs_desc); info->table = dg1_mocs_desc; info->uc_index = 1; @@ -421,6 +619,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, case XE_ALDERLAKE_S: case XE_ALDERLAKE_P: case XE_ALDERLAKE_N: + info->ops = &xelp_mocs_ops; info->size = ARRAY_SIZE(gen12_mocs_desc); info->table = gen12_mocs_desc; info->n_entries = XELP_NUM_MOCS_ENTRIES; @@ -442,6 +641,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe, */ xe_assert(xe, info->unused_entries_index != 0); + xe_assert(xe, !info->ops || info->ops->dump); + if (XE_WARN_ON(info->size > info->n_entries)) { info->table = NULL; return 0; @@ -467,16 +668,6 @@ static u32 get_entry_control(const struct xe_mocs_info *info, return info->table[info->unused_entries_index].control_value; } -static bool regs_are_mcr(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - - if (xe_gt_is_media_type(gt)) - return MEDIA_VER(xe) >= 20; - else - return GRAPHICS_VERx100(xe) >= 1250; -} - static void __init_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { @@ -578,6 +769,33 @@ void xe_mocs_init(struct xe_gt *gt) init_l3cc_table(gt, &table); } +void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_mocs_info table; + unsigned int flags; + u32 ret; + struct xe_device *xe = gt_to_xe(gt); + + flags = get_mocs_settings(xe, &table); + + if (!table.ops->dump) + return; + + xe_pm_runtime_get_noresume(xe); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + + if (ret) + goto err_fw; + + table.ops->dump(&table, flags, gt, p); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + +err_fw: + xe_assert(xe, !ret); + xe_pm_runtime_put(xe); +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_mocs.c" #endif diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index 053754c5a94e..dc972ffd4d07 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -6,12 +6,17 @@ #ifndef _XE_MOCS_H_ #define _XE_MOCS_H_ -#include <linux/types.h> - -struct xe_exec_queue; +struct drm_printer; struct xe_gt; void xe_mocs_init_early(struct xe_gt *gt); void xe_mocs_init(struct xe_gt *gt); +/** + * xe_mocs_dump - Dump mocs table + * @gt: GT structure + * @p: Printer to dump info to + */ +void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index ceb8345cbca6..3edeb30d5ccb 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -17,6 +17,7 @@ struct xe_modparam xe_modparam = { .enable_display = true, .guc_log_level = 5, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, + .wedged_mode = 1, /* the rest are 0 by default */ }; @@ -55,6 +56,10 @@ MODULE_PARM_DESC(max_vfs, "(0 = no VFs [default]; N = allow up to N VFs)"); #endif +module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); +MODULE_PARM_DESC(wedged_mode, + "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + struct init_funcs { int (*init)(void); void (*exit)(void); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index b369984f08ec..61a0d28a28c8 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -21,6 +21,7 @@ struct xe_modparam { #ifdef CONFIG_PCI_IOV unsigned int max_vfs; #endif + int wedged_mode; }; extern struct xe_modparam xe_modparam; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index d5b516f115ad..4ee32ee1cc88 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -10,6 +10,7 @@ #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_mmio.h" diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index d6859108cc69..8dc82ee7ce6a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -146,6 +146,7 @@ static const struct xe_graphics_desc graphics_xehpc = { .vram_flags = XE_VRAM_FLAGS_NEED64K, .has_asid = 1, + .has_atomic_enable_pte_bit = 1, .has_flat_ccs = 0, .has_usm = 1, }; @@ -163,7 +164,9 @@ static const struct xe_graphics_desc graphics_xelpg = { #define XE2_GFX_FEATURES \ .dma_mask_size = 46, \ .has_asid = 1, \ + .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ + .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .va_bits = 48, \ @@ -211,7 +214,8 @@ static const struct xe_media_desc media_xe2 = { .name = "Xe2_LPM / Xe2_HPM", .hw_engine_mask = GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | - GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) | + BIT(XE_HW_ENGINE_GSCCS0) }; static const struct xe_device_desc tgl_desc = { @@ -629,6 +633,9 @@ static int xe_info_init(struct xe_device *xe, xe->info.va_bits = graphics_desc->va_bits; xe->info.vm_max_level = graphics_desc->vm_max_level; xe->info.has_asid = graphics_desc->has_asid; + xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; + if (xe->info.platform != XE_PVC) + xe->info.has_device_atomics_on_smem = 1; xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; @@ -656,6 +663,7 @@ static int xe_info_init(struct xe_device *xe, gt = tile->primary_gt; gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; + gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.__engine_mask = graphics_desc->hw_engine_mask; if (MEDIA_VER(xe) < 13 && media_desc) gt->info.__engine_mask |= media_desc->hw_engine_mask; @@ -673,6 +681,7 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.__engine_mask = media_desc->hw_engine_mask; gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index b1ad12fa22d6..79b0f80376a4 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -25,7 +25,9 @@ struct xe_graphics_desc { u8 max_remote_tiles:2; u8 has_asid:1; + u8 has_atomic_enable_pte_bit:1; u8 has_flat_ccs:1; + u8 has_indirect_ring_state:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; }; @@ -36,6 +38,8 @@ struct xe_media_desc { u8 rel; u64 hw_engine_mask; /* hardware engines provided by media IP */ + + u8 has_indirect_ring_state:1; }; struct gmdid_map { diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 37fbeda12d3b..c1831106ea4b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -69,7 +69,7 @@ */ #ifdef CONFIG_LOCKDEP -struct lockdep_map xe_pm_runtime_lockdep_map = { +static struct lockdep_map xe_pm_runtime_lockdep_map = { .name = "xe_pm_runtime_lockdep_map" }; #endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7d50c6e89d8e..5b243b7feb59 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -23,11 +23,19 @@ static void preempt_fence_work_func(struct work_struct *w) q->ops->suspend_wait(q); dma_fence_signal(&pfence->base); - dma_fence_end_signalling(cookie); - + /* + * Opt for keep everything in the fence critical section. This looks really strange since we + * have just signalled the fence, however the preempt fences are all signalled via single + * global ordered-wq, therefore anything that happens in this callback can easily block + * progress on the entire wq, which itself may prevent other published preempt fences from + * ever signalling. Therefore try to keep everything here in the callback in the fence + * critical section. For example if something below grabs a scary lock like vm->lock, + * lockdep should complain since we also hold that lock whilst waiting on preempt fences to + * complete. + */ xe_vm_queue_rebind_worker(q->vm); - xe_exec_queue_put(q); + dma_fence_end_signalling(cookie); } static const char * diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 5b7930f46cf3..87975e45622a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -619,9 +619,40 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; - if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && - (is_devmem || !IS_DGFX(xe))) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + /** + * Default atomic expectations for different allocation scenarios are as follows: + * + * 1. Traditional API: When the VM is not in LR mode: + * - Device atomics are expected to function with all allocations. + * + * 2. Compute/SVM API: When the VM is in LR mode: + * - Device atomics are the default behavior when the bo is placed in a single region. + * - In all other cases device atomics will be disabled with AE=0 until an application + * request differently using a ioctl like madvise. + */ + if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { + if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { + if (bo && xe_bo_has_single_placement(bo)) + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + /** + * If a SMEM+LMEM allocation is backed by SMEM, a device + * atomics will cause a gpu page fault and which then + * gets migrated to LMEM, bind such allocations with + * device atomics enabled. + */ + else if (is_devmem && !xe_bo_has_single_placement(bo)) + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + } else { + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + } + + /** + * Unset AE if the platform(PVC) doesn't support it on an + * allocation + */ + if (!xe->info.has_device_atomics_on_smem && !is_devmem) + xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; + } if (is_devmem) { xe_walk.default_pte |= XE_PPGTT_PTE_DM; @@ -1075,10 +1106,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = { struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; struct xe_gt *gt; - struct xe_vma *vma; struct dma_fence *fence; struct dma_fence_cb cb; struct work_struct work; + u64 start; + u64 end; + u32 asid; }; static const char * @@ -1121,13 +1154,14 @@ static void invalidation_fence_work_func(struct work_struct *w) container_of(w, struct invalidation_fence, work); trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); - xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma); + xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, + ifence->end, ifence->asid); } static int invalidation_fence_init(struct xe_gt *gt, struct invalidation_fence *ifence, struct dma_fence *fence, - struct xe_vma *vma) + u64 start, u64 end, u32 asid) { int ret; @@ -1144,7 +1178,9 @@ static int invalidation_fence_init(struct xe_gt *gt, dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; - ifence->vma = vma; + ifence->start = start; + ifence->end = end; + ifence->asid = asid; INIT_WORK(&ifence->work, invalidation_fence_work_func); ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); @@ -1295,8 +1331,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue /* TLB invalidation must be done before signaling rebind */ if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, ifence, fence, - vma); + int err = invalidation_fence_init(tile->primary_gt, + ifence, fence, + xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); if (err) { dma_fence_put(fence); kfree(ifence); @@ -1641,7 +1680,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu dma_fence_wait(fence, false); /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); + err = invalidation_fence_init(tile->primary_gt, ifence, fence, + xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); if (err) { dma_fence_put(fence); kfree(ifence); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index df407d73e5f5..29f847debb5c 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -16,6 +16,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc_hwconfig.h" diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index e3197c33afe2..51fbba423e27 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -6,8 +6,6 @@ #ifndef _XE_REG_SR_ #define _XE_REG_SR_ -#include "xe_reg_sr_types.h" - /* * Reg save/restore bookkeeping */ @@ -15,6 +13,8 @@ struct xe_device; struct xe_gt; struct xe_hw_engine; +struct xe_reg_sr; +struct xe_reg_sr_entry; struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index d42b3f33bd7a..a3ca718456f6 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -7,9 +7,9 @@ #include <generated/xe_wa_oob.h> +#include "instructions/xe_gpu_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_exec_queue_types.h" diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index c56fedd126e6..337b1ef1959c 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -341,7 +341,7 @@ struct xe_reg_sr; * }; */ #define XE_RTP_RULES(...) \ - .n_rules = _XE_COUNT_ARGS(__VA_ARGS__), \ + .n_rules = COUNT_ARGS(__VA_ARGS__), \ .rules = (const struct xe_rtp_rule[]) { \ XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__)) \ } @@ -366,7 +366,7 @@ struct xe_reg_sr; * }; */ #define XE_RTP_ACTIONS(...) \ - .n_actions = _XE_COUNT_ARGS(__VA_ARGS__), \ + .n_actions = COUNT_ARGS(__VA_ARGS__), \ .actions = (const struct xe_rtp_action[]) { \ XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \ } diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h index 181b6290fac3..7735f217ba71 100644 --- a/drivers/gpu/drm/xe/xe_rtp_helpers.h +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -10,22 +10,16 @@ #error "This header is supposed to be included by xe_rtp.h only" #endif +#include "xe_args.h" + /* * Helper macros - not to be used outside this header. */ #define _XE_ESC(...) __VA_ARGS__ -#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,)) -#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_ - -#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,)) -#define __XE_FIRST(x_, ...) x_ -#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__)) -#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__) -#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__ +#define _XE_TUPLE_TAIL(...) (DROP_FIRST_ARG(__VA_ARGS__)) -#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b) -#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b +#define _XE_RTP_CONCAT(a, b) CONCATENATE(XE_RTP_, CONCATENATE(a, b)) #define __XE_RTP_PASTE_SEP_COMMA , #define __XE_RTP_PASTE_SEP_BITWISE_OR | @@ -59,11 +53,11 @@ * * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR */ -#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_)) -#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) -#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_RTP_CONCAT(PASTE_, COUNT_ARGS args_)(prefix_, sep_, args_) +#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) +#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) /* * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer @@ -76,6 +70,6 @@ * * { .a = 10 } */ -#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__) +#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST_ARG _XE_ESC __VA_ARGS__) #endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 65f1f1628235..2883d9aca404 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -339,6 +339,21 @@ err_out: } /** + * __xe_sync_ufence_get() - Get user fence from user fence + * @ufence: input user fence + * + * Get a user fence reference from user fence + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence) +{ + user_fence_get(ufence); + + return ufence; +} + +/** * xe_sync_ufence_get() - Get user fence from sync * @sync: input sync * diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 3e03396af2c6..006dbf780793 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence); struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); void xe_sync_ufence_put(struct xe_user_fence *ufence); int xe_sync_ufence_get_status(struct xe_user_fence *ufence); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 4feb35c95a1c..45035e38388b 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -5,6 +5,7 @@ #include "xe_uc.h" +#include "xe_assert.h" #include "xe_device.h" #include "xe_gsc.h" #include "xe_gsc_proxy.h" @@ -215,13 +216,13 @@ void xe_uc_stop_prepare(struct xe_uc *uc) xe_guc_stop_prepare(&uc->guc); } -int xe_uc_stop(struct xe_uc *uc) +void xe_uc_stop(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; + return; - return xe_guc_stop(&uc->guc); + xe_guc_stop(&uc->guc); } int xe_uc_start(struct xe_uc *uc) @@ -247,17 +248,13 @@ again: int xe_uc_suspend(struct xe_uc *uc) { - int ret; - /* GuC submission not enabled, nothing to do */ if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; uc_reset_wait(uc); - ret = xe_uc_stop(uc); - if (ret) - return ret; + xe_uc_stop(uc); return xe_guc_suspend(&uc->guc); } diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index e4d4e3c99f0e..5dfa7725483d 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -16,7 +16,7 @@ int xe_uc_fini_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); -int xe_uc_stop(struct xe_uc *uc); +void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4aa3943e6f29..c5b1694b292f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -315,19 +315,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm) #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 -static void xe_vm_kill(struct xe_vm *vm) +static void xe_vm_kill(struct xe_vm *vm, bool unlocked) { struct xe_exec_queue *q; lockdep_assert_held(&vm->lock); - xe_vm_lock(vm, false); + if (unlocked) + xe_vm_lock(vm, false); + vm->flags |= XE_VM_FLAG_BANNED; trace_xe_vm_kill(vm); list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) q->ops->kill(q); - xe_vm_unlock(vm); + + if (unlocked) + xe_vm_unlock(vm); /* TODO: Inform user the VM is banned */ } @@ -557,7 +561,7 @@ out_unlock_outer: if (err) { drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); - xe_vm_kill(vm); + xe_vm_kill(vm, true); } up_write(&vm->lock); @@ -708,37 +712,116 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm) list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; } -static struct dma_fence * -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op); +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, + u8 tile_mask) +{ + INIT_LIST_HEAD(&op->link); + op->tile_mask = tile_mask; + op->base.op = DRM_GPUVA_OP_MAP; + op->base.map.va.addr = vma->gpuva.va.addr; + op->base.map.va.range = vma->gpuva.va.range; + op->base.map.gem.obj = vma->gpuva.gem.obj; + op->base.map.gem.offset = vma->gpuva.gem.offset; + op->map.vma = vma; + op->map.immediate = true; + op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; + op->map.is_null = xe_vma_is_null(vma); +} + +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, + u8 tile_mask) +{ + struct xe_vma_op *op; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) + return -ENOMEM; + + xe_vm_populate_rebind(op, vma, tile_mask); + list_add_tail(&op->link, &vops->list); + + return 0; +} + +static struct dma_fence *ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops); +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { struct dma_fence *fence; struct xe_vma *vma, *next; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + int err; lockdep_assert_held(&vm->lock); - if (xe_vm_in_lr_mode(vm) && !rebind_worker) + if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || + list_empty(&vm->rebind_list)) return 0; + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + xe_vm_assert_held(vm); - list_for_each_entry_safe(vma, next, &vm->rebind_list, - combined_links.rebind) { + list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { xe_assert(vm->xe, vma->tile_present); - list_del_init(&vma->combined_links.rebind); if (rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); - fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); - if (IS_ERR(fence)) - return PTR_ERR(fence); + + err = xe_vm_ops_add_rebind(&vops, vma, + vma->tile_present); + if (err) + goto free_ops; + } + + fence = ops_execute(vm, &vops); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + } else { dma_fence_put(fence); + list_for_each_entry_safe(vma, next, &vm->rebind_list, + combined_links.rebind) + list_del_init(&vma->combined_links.rebind); + } +free_ops: + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); } - return 0; + return err; +} + +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) +{ + struct dma_fence *fence = NULL; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + int err; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); + + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + + err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); + if (err) + return ERR_PTR(err); + + fence = ops_execute(vm, &vops); + + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); + } + + return fence; } static void xe_vma_free(struct xe_vma *vma) @@ -805,7 +888,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; - if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) + if (vm->xe->info.has_atomic_enable_pte_bit) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; vma->pat_index = pat_index; @@ -1173,6 +1256,8 @@ static const struct xe_pt_ops xelp_pt_ops = { .pde_encode_bo = xelp_pde_encode_bo, }; +static void vm_destroy_work_func(struct work_struct *w); + /** * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the * given tile and vm. @@ -1252,6 +1337,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); + INIT_LIST_HEAD(&vm->preempt.exec_queues); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ @@ -1274,7 +1361,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) drm_gem_object_put(vm_resv_obj); - err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + err = xe_vm_lock(vm, true); if (err) goto err_close; @@ -1318,7 +1405,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_pt_populate_empty(tile, vm, vm->pt_root[id]); } - dma_resv_unlock(xe_vm_resv(vm)); + xe_vm_unlock(vm); /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { @@ -1360,7 +1447,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) return vm; err_unlock_close: - dma_resv_unlock(xe_vm_resv(vm)); + xe_vm_unlock(vm); err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); @@ -1489,9 +1576,10 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vm_put(vm); } -static void xe_vm_free(struct drm_gpuvm *gpuvm) +static void vm_destroy_work_func(struct work_struct *w) { - struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); + struct xe_vm *vm = + container_of(w, struct xe_vm, destroy_work); struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; @@ -1514,6 +1602,14 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) kfree(vm); } +static void xe_vm_free(struct drm_gpuvm *gpuvm) +{ + struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); + + /* To destroy the VM we need to be able to sleep */ + queue_work(system_unbound_wq, &vm->destroy_work); +} + struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) { struct xe_vm *vm; @@ -1550,23 +1646,13 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - int cur_fence = 0, i; + int cur_fence = 0; int number_tiles = hweight8(vma->tile_present); int err; u8 id; trace_xe_vma_unbind(vma); - if (vma->ufence) { - struct xe_user_fence * const f = vma->ufence; - - if (!xe_sync_ufence_get_status(f)) - return ERR_PTR(-EBUSY); - - vma->ufence = NULL; - xe_sync_ufence_put(f); - } - if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1608,10 +1694,6 @@ next: fence = cf ? &cf->base : !fence ? xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - } return fence; @@ -1628,15 +1710,15 @@ err_fences: static struct dma_fence * xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) + u8 tile_mask, bool first_op, bool last_op) { struct xe_tile *tile; struct dma_fence *fence; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct xe_vm *vm = xe_vma_vm(vma); - int cur_fence = 0, i; - int number_tiles = hweight8(vma->tile_mask); + int cur_fence = 0; + int number_tiles = hweight8(tile_mask); int err; u8 id; @@ -1650,7 +1732,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, } for_each_tile(tile, vm->xe, id) { - if (!(vma->tile_mask & BIT(id))) + if (!(tile_mask & BIT(id))) goto next; fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id], @@ -1682,12 +1764,6 @@ next: } } - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], - cf ? &cf->base : fence); - } - return cf ? &cf->base : fence; err_fences: @@ -1715,87 +1791,46 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) return NULL; } -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool immediate, bool first_op, - bool last_op) +static struct dma_fence * +xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs, + u8 tile_mask, bool immediate, bool first_op, bool last_op) { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - struct xe_user_fence *ufence; xe_vm_assert_held(vm); - - ufence = find_ufence_get(syncs, num_syncs); - if (vma->ufence && ufence) - xe_sync_ufence_put(vma->ufence); - - vma->ufence = ufence ?: vma->ufence; + xe_bo_assert_held(bo); if (immediate) { - fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, - last_op); + fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask, + first_op, last_op); if (IS_ERR(fence)) - return PTR_ERR(fence); + return fence; } else { - int i; - xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - } - } - - if (last_op) - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); - - return 0; -} - -static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_bo *bo, struct xe_sync_entry *syncs, - u32 num_syncs, bool immediate, bool first_op, - bool last_op) -{ - int err; - - xe_vm_assert_held(vm); - xe_bo_assert_held(bo); - - if (bo && immediate) { - err = xe_bo_validate(bo, vm, true); - if (err) - return err; } - return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op, - last_op); + return fence; } -static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool first_op, bool last_op) +static struct dma_fence * +xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool first_op, bool last_op) { struct dma_fence *fence; - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) - return PTR_ERR(fence); - - xe_vma_destroy(vma, fence); - if (last_op) - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); + return fence; - return 0; + return fence; } #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ @@ -1938,40 +1973,18 @@ static const u32 region_to_mem_type[] = { XE_PL_VRAM1, }; -static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, u32 region, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) +static struct dma_fence * +xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool first_op, bool last_op) { struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - int err; - - xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type)); - - if (!xe_vma_has_no_bo(vma)) { - err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); - if (err) - return err; - } if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) { return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, - true, first_op, last_op); + vma->tile_mask, true, first_op, last_op); } else { - int i; - - /* Nothing to do, signal fences now */ - if (last_op) { - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = - xe_exec_queue_last_fence_get(wait_exec_queue, vm); - - xe_sync_entry_signal(&syncs[i], fence); - dma_fence_put(fence); - } - } - - return 0; + return xe_exec_queue_last_fence_get(wait_exec_queue, vm); } } @@ -2267,23 +2280,28 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct drm_gpuva_ops *ops, struct xe_sync_entry *syncs, u32 num_syncs, - struct list_head *ops_list, bool last) + struct xe_vma_ops *vops, bool last) { struct xe_device *xe = vm->xe; struct xe_vma_op *last_op = NULL; struct drm_gpuva_op *__op; + struct xe_tile *tile; + u8 id, tile_mask = 0; int err = 0; lockdep_assert_held_write(&vm->lock); + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); struct xe_vma *vma; - bool first = list_empty(ops_list); + bool first = list_empty(&vops->list); unsigned int flags = 0; INIT_LIST_HEAD(&op->link); - list_add_tail(&op->link, ops_list); + list_add_tail(&op->link, &vops->list); if (first) { op->flags |= XE_VMA_OP_FIRST; @@ -2292,6 +2310,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } op->q = q; + op->tile_mask = tile_mask; switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -2409,12 +2428,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } /* FIXME: Unhandled corner case */ - XE_WARN_ON(!last_op && last && !list_empty(ops_list)); + XE_WARN_ON(!last_op && last && !list_empty(&vops->list)); if (!last_op) return 0; - last_op->ops = ops; if (last) { last_op->flags |= XE_VMA_OP_LAST; last_op->num_syncs = num_syncs; @@ -2424,27 +2442,24 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, return 0; } -static int op_execute(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma *vma, struct xe_vma_op *op) +static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { - int err; - - lockdep_assert_held_write(&vm->lock); + struct dma_fence *fence = NULL; - err = xe_vm_lock_vma(exec, vma); - if (err) - return err; + lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), - op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), + op->syncs, op->num_syncs, + op->tile_mask, + op->map.immediate || !xe_vm_in_fault_mode(vm), + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_REMAP: { @@ -2454,37 +2469,41 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, if (!op->remap.unmap_done) { if (prev || next) vma->gpuva.flags |= XE_VMA_FIRST_REBIND; - err = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST && - !prev && !next); - if (err) + fence = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST && + !prev && !next); + if (IS_ERR(fence)) break; op->remap.unmap_done = true; } if (prev) { op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.prev, op->q, - xe_vma_bo(op->remap.prev), op->syncs, - op->num_syncs, true, false, - op->flags & XE_VMA_OP_LAST && !next); + dma_fence_put(fence); + fence = xe_vm_bind(vm, op->remap.prev, op->q, + xe_vma_bo(op->remap.prev), op->syncs, + op->num_syncs, + op->remap.prev->tile_mask, true, + false, + op->flags & XE_VMA_OP_LAST && !next); op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (err) + if (IS_ERR(fence)) break; op->remap.prev = NULL; } if (next) { op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.next, op->q, - xe_vma_bo(op->remap.next), - op->syncs, op->num_syncs, - true, false, - op->flags & XE_VMA_OP_LAST); + dma_fence_put(fence); + fence = xe_vm_bind(vm, op->remap.next, op->q, + xe_vma_bo(op->remap.next), + op->syncs, op->num_syncs, + op->remap.next->tile_mask, true, + false, op->flags & XE_VMA_OP_LAST); op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (err) + if (IS_ERR(fence)) break; op->remap.next = NULL; } @@ -2492,43 +2511,35 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, break; } case DRM_GPUVA_OP_UNMAP: - err = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_PREFETCH: - err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, - op->syncs, op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - if (err) + if (IS_ERR(fence)) trace_xe_vma_fail(vma); - return err; + return fence; } -static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vma_op *op) +static struct dma_fence * +__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { - struct drm_exec exec; + struct dma_fence *fence; int err; retry_userptr: - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = op_execute(&exec, vm, vma, op); - drm_exec_retry_on_contention(&exec); - if (err) - break; - } - drm_exec_fini(&exec); - - if (err == -EAGAIN) { + fence = op_execute(vm, vma, op); + if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) { lockdep_assert_held_write(&vm->lock); if (op->base.op == DRM_GPUVA_OP_REMAP) { @@ -2545,22 +2556,24 @@ retry_userptr: if (!err) goto retry_userptr; + fence = ERR_PTR(err); trace_xe_vma_fail(vma); } } - return err; + return fence; } -static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) +static struct dma_fence * +xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) { - int ret = 0; + struct dma_fence *fence = ERR_PTR(-ENOMEM); - lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->lock); switch (op->base.op) { case DRM_GPUVA_OP_MAP: - ret = __xe_vma_op_execute(vm, op->map.vma, op); + fence = __xe_vma_op_execute(vm, op->map.vma, op); break; case DRM_GPUVA_OP_REMAP: { @@ -2573,42 +2586,23 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) else vma = op->remap.next; - ret = __xe_vma_op_execute(vm, vma, op); + fence = __xe_vma_op_execute(vm, vma, op); break; } case DRM_GPUVA_OP_UNMAP: - ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), - op); + fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), + op); break; case DRM_GPUVA_OP_PREFETCH: - ret = __xe_vma_op_execute(vm, - gpuva_to_vma(op->base.prefetch.va), - op); + fence = __xe_vma_op_execute(vm, + gpuva_to_vma(op->base.prefetch.va), + op); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - return ret; -} - -static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) -{ - bool last = op->flags & XE_VMA_OP_LAST; - - if (last) { - while (op->num_syncs--) - xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); - kfree(op->syncs); - if (op->q) - xe_exec_queue_put(op->q); - } - if (!list_empty(&op->link)) - list_del(&op->link); - if (op->ops) - drm_gpuva_ops_free(&vm->gpuvm, op->ops); - if (last) - xe_vm_put(vm); + return fence; } static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, @@ -2687,34 +2681,223 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, op->flags & XE_VMA_OP_PREV_COMMITTED, op->flags & XE_VMA_OP_NEXT_COMMITTED); } + } +} + +static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, + bool validate) +{ + struct xe_bo *bo = xe_vma_bo(vma); + int err = 0; - drm_gpuva_ops_free(&vm->gpuvm, __ops); + if (bo) { + if (!bo->vm) + err = drm_exec_lock_obj(exec, &bo->ttm.base); + if (!err && validate) + err = xe_bo_validate(bo, xe_vma_vm(vma), true); } + + return err; } -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct list_head *ops_list) +static int check_ufence(struct xe_vma *vma) +{ + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return -EBUSY; + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + + return 0; +} + +static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, + struct xe_vma_op *op) +{ + int err = 0; + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err = vma_lock_and_validate(exec, op->map.vma, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); + break; + case DRM_GPUVA_OP_REMAP: + err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); + if (err) + break; + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.remap.unmap->va), + false); + if (!err && op->remap.prev) + err = vma_lock_and_validate(exec, op->remap.prev, true); + if (!err && op->remap.next) + err = vma_lock_and_validate(exec, op->remap.next, true); + break; + case DRM_GPUVA_OP_UNMAP: + err = check_ufence(gpuva_to_vma(op->base.unmap.va)); + if (err) + break; + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.unmap.va), + false); + break; + case DRM_GPUVA_OP_PREFETCH: + { + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + u32 region = op->prefetch.region; + + xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.prefetch.va), + false); + if (!err && !xe_vma_has_no_bo(vma)) + err = xe_bo_migrate(xe_vma_bo(vma), + region_to_mem_type[region]); + break; + } + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + return err; +} + +static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, + struct xe_vm *vm, + struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + int err; + + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (err) + return err; + + list_for_each_entry(op, &vops->list, link) { + err = op_lock_and_prep(exec, vm, op); + if (err) + return err; + } + + return 0; +} + +static struct dma_fence *ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops) { struct xe_vma_op *op, *next; + struct dma_fence *fence = NULL; + + list_for_each_entry_safe(op, next, &vops->list, link) { + dma_fence_put(fence); + fence = xe_vma_op_execute(vm, op); + if (IS_ERR(fence)) { + drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld", + op->base.op, PTR_ERR(fence)); + fence = ERR_PTR(-ENOSPC); + break; + } + } + + return fence; +} + +static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) +{ + if (vma->ufence) + xe_sync_ufence_put(vma->ufence); + vma->ufence = __xe_sync_ufence_get(ufence); +} + +static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_user_fence *ufence) +{ + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + vma_add_ufence(op->map.vma, ufence); + break; + case DRM_GPUVA_OP_REMAP: + if (op->remap.prev) + vma_add_ufence(op->remap.prev, ufence); + if (op->remap.next) + vma_add_ufence(op->remap.next, ufence); + break; + case DRM_GPUVA_OP_UNMAP: + break; + case DRM_GPUVA_OP_PREFETCH: + vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } +} + +static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, + struct dma_fence *fence) +{ + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); + struct xe_user_fence *ufence; + struct xe_vma_op *op; + int i; + + ufence = find_ufence_get(vops->syncs, vops->num_syncs); + list_for_each_entry(op, &vops->list, link) { + if (ufence) + op_add_ufence(vm, op, ufence); + + if (op->base.op == DRM_GPUVA_OP_UNMAP) + xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); + else if (op->base.op == DRM_GPUVA_OP_REMAP) + xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), + fence); + } + if (ufence) + xe_sync_ufence_put(ufence); + for (i = 0; i < vops->num_syncs; i++) + xe_sync_entry_signal(vops->syncs + i, fence); + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); + dma_fence_put(fence); +} + +static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops) +{ + struct drm_exec exec; + struct dma_fence *fence; int err; lockdep_assert_held_write(&vm->lock); - list_for_each_entry_safe(op, next, ops_list, link) { - err = xe_vma_op_execute(vm, op); - if (err) { - drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", - op->base.op, err); - /* - * FIXME: Killing VM rather than proper error handling - */ - xe_vm_kill(vm); - return -ENOSPC; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); + drm_exec_retry_on_contention(&exec); + if (err) + goto unlock; + + fence = ops_execute(vm, vops); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + /* FIXME: Killing VM rather than proper error handling */ + xe_vm_kill(vm, false); + goto unlock; + } else { + vm_bind_ioctl_ops_fini(vm, vops, fence); } - xe_vma_op_cleanup(vm, op); } - return 0; +unlock: + drm_exec_fini(&exec); + return err; } #define SUPPORTED_FLAGS \ @@ -2862,6 +3045,58 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, return err; } +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + memset(vops, 0, sizeof(*vops)); + INIT_LIST_HEAD(&vops->list); + vops->vm = vm; + vops->q = q; + vops->syncs = syncs; + vops->num_syncs = num_syncs; +} + +static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, + u64 addr, u64 range, u64 obj_offset, + u16 pat_index) +{ + u16 coh_mode; + + if (XE_IOCTL_DBG(xe, range > bo->size) || + XE_IOCTL_DBG(xe, obj_offset > + bo->size - range)) { + return -EINVAL; + } + + if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { + if (XE_IOCTL_DBG(xe, obj_offset & + XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { + return -EINVAL; + } + } + + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (bo->cpu_caching) { + if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { + return -EINVAL; + } + } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { + /* + * Imported dma-buf from a different device should + * require 1way or 2way coherency since we don't know + * how it was mapped on the CPU. Just assume is it + * potentially cached on CPU side. + */ + return -EINVAL; + } + + return 0; +} + int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -2875,7 +3110,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; - LIST_HEAD(ops_list); + struct xe_vma_ops vops; int err; int i; @@ -2945,7 +3180,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 obj = bind_ops[i].obj; u64 obj_offset = bind_ops[i].obj_offset; u16 pat_index = bind_ops[i].pat_index; - u16 coh_mode; if (!obj) continue; @@ -2957,40 +3191,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } bos[i] = gem_to_xe_bo(gem_obj); - if (XE_IOCTL_DBG(xe, range > bos[i]->size) || - XE_IOCTL_DBG(xe, obj_offset > - bos[i]->size - range)) { - err = -EINVAL; - goto put_obj; - } - - if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) { - if (XE_IOCTL_DBG(xe, obj_offset & - XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - err = -EINVAL; - goto put_obj; - } - } - - coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); - if (bos[i]->cpu_caching) { - if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && - bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - err = -EINVAL; - goto put_obj; - } - } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { - /* - * Imported dma-buf from a different device should - * require 1way or 2way coherency since we don't know - * how it was mapped on the CPU. Just assume is it - * potentially cached on CPU side. - */ - err = -EINVAL; + err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, + obj_offset, pat_index); + if (err) goto put_obj; - } } if (args->num_syncs) { @@ -3026,6 +3230,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_syncs; } + xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3045,42 +3250,25 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, - &ops_list, - i == args->num_binds - 1); + &vops, i == args->num_binds - 1); if (err) goto unwind_ops; } /* Nothing to do */ - if (list_empty(&ops_list)) { + if (list_empty(&vops.list)) { err = -ENODATA; goto unwind_ops; } - xe_vm_get(vm); - if (q) - xe_exec_queue_get(q); - - err = vm_bind_ioctl_ops_execute(vm, &ops_list); - - up_write(&vm->lock); - - if (q) - xe_exec_queue_put(q); - xe_vm_put(vm); - - for (i = 0; bos && i < args->num_binds; ++i) - xe_bo_put(bos[i]); - - kvfree(bos); - kvfree(ops); - if (args->num_binds > 1) - kvfree(bind_ops); - - return err; + err = vm_bind_ioctl_ops_execute(vm, &vops); unwind_ops: - vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); + if (err && err != -ENODATA) + vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); + for (i = args->num_binds - 1; i >= 0; --i) + if (ops[i]) + drm_gpuva_ops_free(&vm->gpuvm, ops[i]); free_syncs: if (err == -ENODATA) err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); @@ -3339,7 +3527,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) } if (bo) { - dma_resv_lock(bo->ttm.base.resv, NULL); + xe_bo_lock(bo, false); err = ttm_bo_vmap(&bo->ttm, &src); if (!err) { xe_map_memcpy_from(xe_bo_device(bo), @@ -3348,7 +3536,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) snap->snap[i].len); ttm_bo_vunmap(&bo->ttm, &src); } - dma_resv_unlock(bo->ttm.base.resv); + xe_bo_unlock(bo); } else { void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 306cd0934a19..3ac9021f970e 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -6,6 +6,7 @@ #ifndef _XE_VM_H_ #define _XE_VM_H_ +#include "xe_assert.h" #include "xe_bo_types.h" #include "xe_macros.h" #include "xe_map.h" @@ -208,6 +209,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm); int xe_vm_userptr_check_repin(struct xe_vm *vm); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, + u8 tile_mask); int xe_vm_invalidate_vma(struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index bdc6659891a5..4d33f310b653 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -25,7 +25,7 @@ * VM bind (create GPU mapping for a BO or userptr) * ================================================ * - * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same + * Creates GPU mappings for a BO or userptr within a VM. VM binds uses the same * in / out fence interface (struct drm_xe_sync) as execs which allows users to * think of binds and execs as more or less the same operation. * @@ -190,8 +190,8 @@ * Deferred binds in fault mode * ---------------------------- * - * In a VM is in fault mode (TODO: link to fault mode), new bind operations that - * create mappings are by default are deferred to the page fault handler (first + * If a VM is in fault mode (TODO: link to fault mode), new bind operations that + * create mappings are by default deferred to the page fault handler (first * use). This behavior can be overriden by setting the flag * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping * immediately. @@ -225,7 +225,7 @@ * * A VM in compute mode enables long running workloads and ultra low latency * submission (ULLS). ULLS is implemented via a continuously running batch + - * semaphores. This enables to the user to insert jump to new batch commands + * semaphores. This enables the user to insert jump to new batch commands * into the continuously running batch. In both cases these batches exceed the * time a dma fence is allowed to exist for before signaling, as such dma fences * are not used when a VM is in compute mode. User fences (TODO: link user fence @@ -244,7 +244,7 @@ * Once all preempt fences are signaled for a VM the kernel can safely move the * memory and kick the rebind worker which resumes all the engines execution. * - * A preempt fence, for every engine using the VM, is installed the VM's + * A preempt fence, for every engine using the VM, is installed into the VM's * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every * engine using the VM, is also installed into the same dma-resv slot of every * external BO mapped in the VM. @@ -314,7 +314,7 @@ * signaling, and memory allocation is usually required to resolve a page * fault, but memory allocation is not allowed to gate dma fence signaling. As * such, dma fences are not allowed when VM is in fault mode. Because dma-fences - * are not allowed, long running workloads and ULLS are enabled on a faulting + * are not allowed, only long running workloads and ULLS are enabled on a faulting * VM. * * Defered VM binds @@ -399,14 +399,14 @@ * Notice no rebind is issued in the access counter handler as the rebind will * be issued on next page fault. * - * Cavets with eviction / user pointer invalidation - * ------------------------------------------------ + * Caveats with eviction / user pointer invalidation + * ------------------------------------------------- * * In the case of eviction and user pointer invalidation on a faulting VM, there * is no need to issue a rebind rather we just need to blow away the page tables * for the VMAs and the page fault handler will rebind the VMAs when they fault. - * The cavet is to update / read the page table structure the VM global lock is - * neeeed. In both the case of eviction and user pointer invalidation locks are + * The caveat is to update / read the page table structure the VM global lock is + * needed. In both the case of eviction and user pointer invalidation locks are * held which make acquiring the VM global lock impossible. To work around this * every VMA maintains a list of leaf page table entries which should be written * to zero to blow away the VMA's page tables. After writing zero to these @@ -427,9 +427,9 @@ * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects * the list of userptrs mapped in the VM, the list of engines using this VM, and * the array of external BOs mapped in the VM. When adding or removing any of the - * aforemented state from the VM should acquire this lock in write mode. The VM + * aforementioned state from the VM should acquire this lock in write mode. The VM * bind path also acquires this lock in write while the exec / compute mode - * rebind worker acquire this lock in read mode. + * rebind worker acquires this lock in read mode. * * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv * slots which is shared with any private BO in the VM. Expected to be acquired diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 72a100671e5d..ce1a63a5e3e7 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -178,6 +178,13 @@ struct xe_vm { struct list_head rebind_list; /** + * @destroy_work: worker to destroy VM, needed as a dma_fence signaling + * from an irq context can be last put and the destroy needs to be able + * to sleep. + */ + struct work_struct destroy_work; + + /** * @rftree: range fence tree to track updates to page table structure. * Used to implement conflict tracking between independent bind engines. */ @@ -323,11 +330,6 @@ enum xe_vma_op_flags { struct xe_vma_op { /** @base: GPUVA base operation */ struct drm_gpuva_op base; - /** - * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this - * operations is processed - */ - struct drm_gpuva_ops *ops; /** @q: exec queue for this operation */ struct xe_exec_queue *q; /** @@ -341,6 +343,8 @@ struct xe_vma_op { struct list_head link; /** @flags: operation flags */ enum xe_vma_op_flags flags; + /** @tile_mask: Tile mask for operation */ + u8 tile_mask; union { /** @map: VMA map operation specific data */ @@ -351,4 +355,19 @@ struct xe_vma_op { struct xe_vma_op_prefetch prefetch; }; }; + +/** struct xe_vma_ops - VMA operations */ +struct xe_vma_ops { + /** @list: list of VMA operations */ + struct list_head list; + /** @vm: VM */ + struct xe_vm *vm; + /** @q: exec queue these operations */ + struct xe_exec_queue *q; + /** @syncs: syncs these operation */ + struct xe_sync_entry *syncs; + /** @num_syncs: number of syncs */ + u32 num_syncs; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index dd214d95e4b6..05db53c1448c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -533,6 +533,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* Xe2_HPM */ @@ -668,6 +672,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, + { XE_RTP_NAME("14021567978"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) + }, /* Xe2_HPG */ { XE_RTP_NAME("15010599737"), @@ -682,6 +691,15 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, + { XE_RTP_NAME("14021490052"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, + DIS_MESH_PARTIAL_AUTOSTRIP | + DIS_MESH_AUTOSTRIP), + SET(VFLSKPD, + DIS_PARTIAL_AUTOSTRIP | + DIS_AUTOSTRIP)) + }, {} }; |