summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/Kconfig4
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug12
-rw-r--r--drivers/gpu/drm/xe/Makefile9
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h8
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h61
-rw-r--r--drivers/gpu/drm/xe/abi/guc_capture_abi.h186
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h1
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h1
-rw-r--r--drivers/gpu/drm/xe/abi/guc_log_abi.h75
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h1
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h64
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h12
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h11
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h2
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h8
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h60
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h4
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h10
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h6
-rw-r--r--drivers/gpu/drm/xe/display/ext/i915_irq.c33
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c84
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c19
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.h24
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c12
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c71
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h4
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c9
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c69
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c52
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h69
-rw-r--r--drivers/gpu/drm/xe/regs/xe_guc_regs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_irq_regs.h82
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h14
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c240
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c22
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c67
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h10
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c27
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c177
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h6
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h21
-rw-r--r--drivers/gpu/drm/xe/xe_device.c134
-rw-r--r--drivers/gpu/drm/xe/xe_device.h15
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h62
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c8
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c21
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c126
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h23
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c16
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c49
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c145
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c137
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c68
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c56
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c239
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c44
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c132
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c419
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c88
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c167
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c1975
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.h61
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture_types.h68
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c474
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c14
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h27
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c313
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log_types.h34
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c84
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c126
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c14
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c307
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h6
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h68
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c16
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c78
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c26
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h19
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c203
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.h6
-rw-r--r--drivers/gpu/drm/xe/xe_memirq_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c6
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c139
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h39
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c31
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c726
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c88
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c106
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c4
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_query.c93
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c33
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c5
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c8
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c3
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h7
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h2
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c8
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c10
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c19
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c19
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c58
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules4
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.c15
154 files changed, 7339 insertions, 2003 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 7bbe46a98ff1..b51a2bde73e2 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -8,12 +8,14 @@ config DRM_XE
select SHMEM
select TMPFS
select DRM_BUDDY
+ select DRM_CLIENT_SELECTION
select DRM_EXEC
select DRM_KMS_HELPER
select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n
select DRM_PANEL
select DRM_SUBALLOC_HELPER
select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_DSC_HELPER
select DRM_DISPLAY_HDCP_HELPER
select DRM_DISPLAY_HDMI_HELPER
select DRM_DISPLAY_HELPER
@@ -49,7 +51,7 @@ config DRM_XE
config DRM_XE_DISPLAY
bool "Enable display support"
- depends on DRM_XE && DRM_XE=m
+ depends on DRM_XE && DRM_XE=m && HAS_IOPORT
select FB_IOMEM_HELPERS
select I2C
select I2C_ALGOBIT
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index bc177368af6c..2de0de41b8dd 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -40,9 +40,21 @@ config DRM_XE_DEBUG_VM
If in doubt, say "N".
+config DRM_XE_DEBUG_MEMIRQ
+ bool "Enable extra memirq debugging"
+ default n
+ help
+ Choose this option to enable additional debugging info for
+ memory based interrupts.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
config DRM_XE_DEBUG_SRIOV
bool "Enable extra SR-IOV debugging"
default n
+ select DRM_XE_DEBUG_MEMIRQ
help
Enable extra SR-IOV debugging info.
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index edfd812e0f41..bc7a04ce69fd 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -56,6 +56,7 @@ xe-y += xe_bb.o \
xe_gt_topology.o \
xe_guc.o \
xe_guc_ads.o \
+ xe_guc_capture.o \
xe_guc_ct.o \
xe_guc_db_mgr.o \
xe_guc_hwconfig.o \
@@ -129,6 +130,7 @@ xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
xe_gt_sriov_pf_config.o \
xe_gt_sriov_pf_control.o \
+ xe_gt_sriov_pf_migration.o \
xe_gt_sriov_pf_monitor.o \
xe_gt_sriov_pf_policy.o \
xe_gt_sriov_pf_service.o \
@@ -148,7 +150,6 @@ subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
-I$(src)/display/ext \
-I$(src)/compat-i915-headers \
-I$(srctree)/drivers/gpu/drm/i915/display/ \
- -Ddrm_i915_gem_object=xe_bo \
-Ddrm_i915_private=xe_device
# Rule to build SOC code shared with i915
@@ -165,6 +166,7 @@ $(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/ext/i915_irq.o \
display/ext/i915_utils.o \
+ display/intel_bo.o \
display/intel_fb_bo.o \
display/intel_fbdev_fb.o \
display/xe_display.o \
@@ -180,7 +182,8 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
# SOC code shared with i915
xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-soc/intel_dram.o \
- i915-soc/intel_pch.o
+ i915-soc/intel_pch.o \
+ i915-soc/intel_rom.o
# Display code shared with i915
xe-$(CONFIG_DRM_XE_DISPLAY) += \
@@ -220,6 +223,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_dp_hdcp.o \
i915-display/intel_dp_link_training.o \
i915-display/intel_dp_mst.o \
+ i915-display/intel_dp_test.o \
i915-display/intel_dpll.o \
i915-display/intel_dpll_mgr.o \
i915-display/intel_dpt_common.o \
@@ -248,6 +252,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_modeset_setup.o \
i915-display/intel_modeset_verify.o \
i915-display/intel_panel.o \
+ i915-display/intel_pfit.o \
i915-display/intel_pmdemand.o \
i915-display/intel_pps.o \
i915-display/intel_psr.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 43ad4652c2b2..b54fe40fc5a9 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -176,6 +176,14 @@ enum xe_guc_sleep_state_status {
#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8)
+enum xe_guc_state_capture_event_status {
+ XE_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0,
+ XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1,
+};
+
+#define XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF
+#define XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN 1
+
#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
/* Flush PPC or SMRO caches along with TLB invalidation request */
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index 181180f5945c..b6a1852749dd 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -557,4 +557,65 @@
#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn
#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96 GUC_HXG_REQUEST_MSG_n_DATAn
+/**
+ * DOC: PF2GUC_SAVE_RESTORE_VF
+ *
+ * This message is used by the PF to migrate VF info state maintained by the GuC.
+ *
+ * This message must be sent as `CTB HXG Message`_.
+ *
+ * Available since GuC version 70.25.0
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = **OPCODE** - operation to take: |
+ * | | | |
+ * | | | - _`GUC_PF_OPCODE_VF_SAVE` = 0 |
+ * | | | - _`GUC_PF_OPCODE_VF_RESTORE` = 1 |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_SAVE_RESTORE_VF` = 0x550B |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VFID** - VF identifier |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **ADDR_LO** - lower 32-bits of GGTT offset to the buffer |
+ * | | | where the VF info will be save to or restored from. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 3 | 31:0 | **ADDR_HI** - upper 32-bits of GGTT offset to the buffer |
+ * | | | where the VF info will be save to or restored from. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 4 | 27:0 | **SIZE** - size of the buffer (in dwords) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 31:28 | MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = **USED** - size of used buffer space (in dwords) |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_SAVE_RESTORE_VF 0x550Bu
+
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 4u)
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC_PF_OPCODE_VF_SAVE 0u
+#define GUC_PF_OPCODE_VF_RESTORE 1u
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE (0xfffffffu << 0)
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_MBZ (0xfu << 28)
+
+#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_0_USED GUC_HXG_RESPONSE_MSG_0_DATA0
+
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_capture_abi.h b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
new file mode 100644
index 000000000000..e7898edc6236
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_CAPTURE_ABI_H
+#define _ABI_GUC_CAPTURE_ABI_H
+
+#include <linux/types.h>
+
+/* Capture List Index */
+enum guc_capture_list_index_type {
+ GUC_CAPTURE_LIST_INDEX_PF = 0,
+ GUC_CAPTURE_LIST_INDEX_VF = 1,
+};
+
+#define GUC_CAPTURE_LIST_INDEX_MAX (GUC_CAPTURE_LIST_INDEX_VF + 1)
+
+/* Register-types of GuC capture register lists */
+enum guc_state_capture_type {
+ GUC_STATE_CAPTURE_TYPE_GLOBAL = 0,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE
+};
+
+#define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)
+
+/* Class indecies for capture_class and capture_instance arrays */
+enum guc_capture_list_class_type {
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
+ GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
+ GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE = 2,
+ GUC_CAPTURE_LIST_CLASS_BLITTER = 3,
+ GUC_CAPTURE_LIST_CLASS_GSC_OTHER = 4,
+};
+
+#define GUC_CAPTURE_LIST_CLASS_MAX (GUC_CAPTURE_LIST_CLASS_GSC_OTHER + 1)
+
+/**
+ * struct guc_mmio_reg - GuC MMIO reg state struct
+ *
+ * GuC MMIO reg state struct
+ */
+struct guc_mmio_reg {
+ /** @offset: MMIO Offset - filled in by Host */
+ u32 offset;
+ /** @value: MMIO Value - Used by Firmware to store value */
+ u32 value;
+ /** @flags: Flags for accessing the MMIO */
+ u32 flags;
+ /** @mask: Value of a mask to apply if mask with value is set */
+ u32 mask;
+#define GUC_REGSET_MASKED BIT(0)
+#define GUC_REGSET_STEERING_NEEDED BIT(1)
+#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
+#define GUC_REGSET_RESTORE_ONLY BIT(3)
+#define GUC_REGSET_STEERING_GROUP GENMASK(16, 12)
+#define GUC_REGSET_STEERING_INSTANCE GENMASK(23, 20)
+} __packed;
+
+/**
+ * struct guc_mmio_reg_set - GuC register sets
+ *
+ * GuC register sets
+ */
+struct guc_mmio_reg_set {
+ /** @address: register address */
+ u32 address;
+ /** @count: register count */
+ u16 count;
+ /** @reserved: reserved */
+ u16 reserved;
+} __packed;
+
+/**
+ * struct guc_debug_capture_list_header - Debug capture list header.
+ *
+ * Debug capture list header.
+ */
+struct guc_debug_capture_list_header {
+ /** @info: contains number of MMIO descriptors in the capture list. */
+ u32 info;
+#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
+} __packed;
+
+/**
+ * struct guc_debug_capture_list - Debug capture list
+ *
+ * As part of ADS registration, these header structures (followed by
+ * an array of 'struct guc_mmio_reg' entries) are used to register with
+ * GuC microkernel the list of registers we want it to dump out prior
+ * to a engine reset.
+ */
+struct guc_debug_capture_list {
+ /** @header: Debug capture list header. */
+ struct guc_debug_capture_list_header header;
+ /** @regs: MMIO descriptors in the capture list. */
+ struct guc_mmio_reg regs[];
+} __packed;
+
+/**
+ * struct guc_state_capture_header_t - State capture header.
+ *
+ * Prior to resetting engines that have hung or faulted, GuC microkernel
+ * reports the engine error-state (register values that was read) by
+ * logging them into the shared GuC log buffer using these hierarchy
+ * of structures.
+ */
+struct guc_state_capture_header_t {
+ /**
+ * @owner: VFID
+ * BR[ 7: 0] MBZ when SRIOV is disabled. When SRIOV is enabled
+ * VFID is an integer in range [0, 63] where 0 means the state capture
+ * is corresponding to the PF and an integer N in range [1, 63] means
+ * the state capture is for VF N.
+ */
+ u32 owner;
+#define GUC_STATE_CAPTURE_HEADER_VFID GENMASK(7, 0)
+ /** @info: Engine class/instance and capture type info */
+ u32 info;
+#define GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE GENMASK(3, 0) /* see guc_state_capture_type */
+#define GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS GENMASK(7, 4) /* see guc_capture_list_class_type */
+#define GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE GENMASK(11, 8)
+ /**
+ * @lrca: logical ring context address.
+ * if type-instance, LRCA (address) that hung, else set to ~0
+ */
+ u32 lrca;
+ /**
+ * @guc_id: context_index.
+ * if type-instance, context index of hung context, else set to ~0
+ */
+ u32 guc_id;
+ /** @num_mmio_entries: Number of captured MMIO entries. */
+ u32 num_mmio_entries;
+#define GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES GENMASK(9, 0)
+} __packed;
+
+/**
+ * struct guc_state_capture_t - State capture.
+ *
+ * State capture
+ */
+struct guc_state_capture_t {
+ /** @header: State capture header. */
+ struct guc_state_capture_header_t header;
+ /** @mmio_entries: Array of captured guc_mmio_reg entries. */
+ struct guc_mmio_reg mmio_entries[];
+} __packed;
+
+/* State Capture Group Type */
+enum guc_state_capture_group_type {
+ GUC_STATE_CAPTURE_GROUP_TYPE_FULL = 0,
+ GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL
+};
+
+#define GUC_STATE_CAPTURE_GROUP_TYPE_MAX (GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL + 1)
+
+/**
+ * struct guc_state_capture_group_header_t - State capture group header
+ *
+ * State capture group header.
+ */
+struct guc_state_capture_group_header_t {
+ /** @owner: VFID */
+ u32 owner;
+#define GUC_STATE_CAPTURE_GROUP_HEADER_VFID GENMASK(7, 0)
+ /** @info: Engine class/instance and capture type info */
+ u32 info;
+#define GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES GENMASK(7, 0)
+#define GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE GENMASK(15, 8)
+} __packed;
+
+/**
+ * struct guc_state_capture_group_t - State capture group.
+ *
+ * this is the top level structure where an error-capture dump starts
+ */
+struct guc_state_capture_group_t {
+ /** @grp_header: State capture group header. */
+ struct guc_state_capture_group_header_t grp_header;
+ /** @capture_entries: Array of state captures */
+ struct guc_state_capture_t capture_entries[];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 8f86a16dc577..f58198cf2cf6 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -52,6 +52,7 @@ struct guc_ct_buffer_desc {
#define GUC_CTB_STATUS_OVERFLOW (1 << 0)
#define GUC_CTB_STATUS_UNDERFLOW (1 << 1)
#define GUC_CTB_STATUS_MISMATCH (1 << 2)
+#define GUC_CTB_STATUS_DISABLED (1 << 3)
u32 reserved[13];
} __packed;
static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 6b30743a2f6c..37606cf8cc5e 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -352,6 +352,7 @@ enum xe_guc_klv_ids {
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007,
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009,
+ GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a,
};
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_log_abi.h b/drivers/gpu/drm/xe/abi/guc_log_abi.h
new file mode 100644
index 000000000000..554630b7ccd9
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_log_abi.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_LOG_ABI_H
+#define _ABI_GUC_LOG_ABI_H
+
+#include <linux/types.h>
+
+/* GuC logging buffer types */
+enum guc_log_buffer_type {
+ GUC_LOG_BUFFER_CRASH_DUMP,
+ GUC_LOG_BUFFER_DEBUG,
+ GUC_LOG_BUFFER_CAPTURE,
+};
+
+#define GUC_LOG_BUFFER_TYPE_MAX 3
+
+/**
+ * struct guc_log_buffer_state - GuC log buffer state
+ *
+ * Below state structure is used for coordination of retrieval of GuC firmware
+ * logs. Separate state is maintained for each log buffer type.
+ * read_ptr points to the location where Xe read last in log buffer and
+ * is read only for GuC firmware. write_ptr is incremented by GuC with number
+ * of bytes written for each log entry and is read only for Xe.
+ * When any type of log buffer becomes half full, GuC sends a flush interrupt.
+ * GuC firmware expects that while it is writing to 2nd half of the buffer,
+ * first half would get consumed by Host and then get a flush completed
+ * acknowledgment from Host, so that it does not end up doing any overwrite
+ * causing loss of logs. So when buffer gets half filled & Xe has requested
+ * for interrupt, GuC will set flush_to_file field, set the sampled_write_ptr
+ * to the value of write_ptr and raise the interrupt.
+ * On receiving the interrupt Xe should read the buffer, clear flush_to_file
+ * field and also update read_ptr with the value of sample_write_ptr, before
+ * sending an acknowledgment to GuC. marker & version fields are for internal
+ * usage of GuC and opaque to Xe. buffer_full_cnt field is incremented every
+ * time GuC detects the log buffer overflow.
+ */
+struct guc_log_buffer_state {
+ /** @marker: buffer state start marker */
+ u32 marker[2];
+ /** @read_ptr: the last byte offset that was read by KMD previously */
+ u32 read_ptr;
+ /**
+ * @write_ptr: the next byte offset location that will be written by
+ * GuC
+ */
+ u32 write_ptr;
+ /** @size: Log buffer size */
+ u32 size;
+ /**
+ * @sampled_write_ptr: Log buffer write pointer
+ * This is written by GuC to the byte offset of the next free entry in
+ * the buffer on log buffer half full or state capture notification
+ */
+ u32 sampled_write_ptr;
+ /**
+ * @wrap_offset: wraparound offset
+ * This is the byte offset of location 1 byte after last valid guc log
+ * event entry written by Guc firmware before there was a wraparound.
+ * This field is updated by guc firmware and should be used by Host
+ * when copying buffer contents to file.
+ */
+ u32 wrap_offset;
+ /** @flags: Flush to file flag and buffer full count */
+ u32 flags;
+#define GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE GENMASK(0, 0)
+#define GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT GENMASK(4, 1)
+ /** @version: The Guc-Log-Entry format version */
+ u32 version;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
deleted file mode 100644
index 710cecca972d..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
+++ /dev/null
@@ -1 +0,0 @@
-/* Empty */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
deleted file mode 100644
index 650ea2803a97..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _I915_GEM_MMAN_H_
-#define _I915_GEM_MMAN_H_
-
-#include "xe_bo_types.h"
-#include <drm/drm_prime.h>
-
-static inline int i915_gem_fb_mmap(struct xe_bo *bo, struct vm_area_struct *vma)
-{
- return drm_gem_prime_mmap(&bo->ttm.base, vma);
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
deleted file mode 100644
index 777c20ceabab..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _I915_GEM_OBJECT_H_
-#define _I915_GEM_OBJECT_H_
-
-#include <linux/types.h>
-
-#include "xe_bo.h"
-
-#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */
-
-static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n)
-{
- /* Should never be called */
- WARN_ON(1);
- return n;
-}
-
-static inline bool i915_gem_object_is_tiled(const struct xe_bo *bo)
-{
- /* legacy tiling is unused */
- return false;
-}
-
-static inline bool i915_gem_object_is_userptr(const struct xe_bo *bo)
-{
- /* legacy tiling is unused */
- return false;
-}
-
-static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
- u32 ofs, u64 *ptr, u32 size)
-{
- struct ttm_bo_kmap_obj map;
- void *src;
- bool is_iomem;
- int ret;
-
- ret = xe_bo_lock(bo, true);
- if (ret)
- return ret;
-
- ret = ttm_bo_kmap(&bo->ttm, ofs >> PAGE_SHIFT, 1, &map);
- if (ret)
- goto out_unlock;
-
- ofs &= ~PAGE_MASK;
- src = ttm_kmap_obj_virtual(&map, &is_iomem);
- src += ofs;
- if (is_iomem)
- memcpy_fromio(ptr, (void __iomem *)src, size);
- else
- memcpy(ptr, src, size);
-
- ttm_bo_kunmap(&map);
-out_unlock:
- xe_bo_unlock(bo);
- return ret;
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
deleted file mode 100644
index 2a3f12d2978c..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _I915_GEM_OBJECT_FRONTBUFFER_H_
-#define _I915_GEM_OBJECT_FRONTBUFFER_H_
-
-#define i915_gem_object_get_frontbuffer(obj) NULL
-#define i915_gem_object_set_frontbuffer(obj, front) (front)
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h
deleted file mode 100644
index 7d6bb1abab73..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/* Copyright © 2024 Intel Corporation */
-
-#ifndef __I915_GEM_OBJECT_TYPES_H__
-#define __I915_GEM_OBJECT_TYPES_H__
-
-#include "xe_bo.h"
-
-#define to_intel_bo(x) gem_to_xe_bo((x))
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
index cb6c7598824b..9c4cf050059a 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
@@ -29,7 +29,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
NULL, size, start, end,
- ttm_bo_type_kernel, flags);
+ ttm_bo_type_kernel, flags, 0);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
bo = NULL;
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
deleted file mode 100644
index b4c47617b64b..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef __I915_DEBUGFS_H__
-#define __I915_DEBUGFS_H__
-
-struct drm_i915_gem_object;
-struct seq_file;
-
-static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {}
-
-#endif /* __I915_DEBUGFS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index f27a2c75b56d..84b0991b35b3 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -14,6 +14,7 @@
#include "i915_utils.h"
#include "intel_runtime_pm.h"
+#include "xe_device.h" /* for xe_device_has_flat_ccs() */
#include "xe_device_types.h"
static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -66,19 +67,14 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE)
+#define IS_PANTHERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_PANTHERLAKE)
#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0)
-#define IP_VER(ver, rel) ((ver) << 8 | (rel))
-
#define IS_MOBILE(xe) (xe && 0)
-#define IS_LP(xe) ((xe) && 0)
-#define IS_GEN9_LP(xe) ((xe) && 0)
-#define IS_GEN9_BC(xe) ((xe) && 0)
-
#define IS_TIGERLAKE_UY(xe) (xe && 0)
#define IS_COMETLAKE_ULX(xe) (xe && 0)
#define IS_COFFEELAKE_ULX(xe) (xe && 0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
deleted file mode 100644
index 98e9dd78f670..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _I915_GPU_ERROR_H_
-#define _I915_GPU_ERROR_H_
-
-struct drm_i915_error_state_buf;
-
-__printf(2, 3)
-static inline void
-i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
-{
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
index 8c7b315aa8ac..274042bff1be 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
@@ -20,18 +20,26 @@ static inline void enable_rpm_wakeref_asserts(void *rpm)
{
}
+static inline bool
+intel_runtime_pm_suspended(struct xe_runtime_pm *pm)
+{
+ struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+ return pm_runtime_suspended(xe->drm.dev);
+}
+
static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
- return xe_pm_runtime_resume_and_get(xe);
+ return xe_pm_runtime_resume_and_get(xe) ? INTEL_WAKEREF_DEF : NULL;
}
static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
- return xe_pm_runtime_get_if_in_use(xe);
+ return xe_pm_runtime_get_if_in_use(xe) ? INTEL_WAKEREF_DEF : NULL;
}
static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm)
@@ -39,7 +47,8 @@ static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
xe_pm_runtime_get_noresume(xe);
- return true;
+
+ return INTEL_WAKEREF_DEF;
}
static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
@@ -62,6 +71,6 @@ static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_
#define with_intel_runtime_pm(rpm, wf) \
for ((wf) = intel_runtime_pm_get(rpm); (wf); \
- intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+ intel_runtime_pm_put((rpm), (wf)), (wf) = NULL)
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
index eb5b5f0e4bd9..0382beb4035b 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -10,11 +10,11 @@
#include "xe_device_types.h"
#include "xe_mmio.h"
-static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore)
+static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncore)
{
struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
- return xe_root_mmio_gt(xe);
+ return xe_root_tile_mmio(xe);
}
static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore)
@@ -29,7 +29,7 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
@@ -37,7 +37,7 @@ static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read8(__compat_uncore_to_mmio(uncore), reg);
}
static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
@@ -45,7 +45,7 @@ static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read16(__compat_uncore_to_mmio(uncore), reg);
}
static inline u64
@@ -57,11 +57,11 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore,
u32 upper, lower, old_upper;
int loop = 0;
- upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+ upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg);
do {
old_upper = upper;
- lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg);
- upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+ lower = xe_mmio_read32(__compat_uncore_to_mmio(uncore), lower_reg);
+ upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg);
} while (upper != old_upper && loop++ < 2);
return (u64)upper << 32 | lower;
@@ -72,7 +72,7 @@ static inline void intel_uncore_posting_read(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write(struct intel_uncore *uncore,
@@ -80,7 +80,7 @@ static inline void intel_uncore_write(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
@@ -88,7 +88,7 @@ static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set);
+ return xe_mmio_rmw32(__compat_uncore_to_mmio(uncore), reg, clear, set);
}
static inline int intel_wait_for_register(struct intel_uncore *uncore,
@@ -97,7 +97,7 @@ static inline int intel_wait_for_register(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
timeout * USEC_PER_MSEC, NULL, false);
}
@@ -107,7 +107,7 @@ static inline int intel_wait_for_register_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
timeout * USEC_PER_MSEC, NULL, false);
}
@@ -118,7 +118,7 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
fast_timeout_us + 1000 * slow_timeout_ms,
out_value, false);
}
@@ -128,7 +128,7 @@ static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
@@ -136,7 +136,7 @@ static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
@@ -144,7 +144,7 @@ static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
@@ -152,33 +152,9 @@ static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
-static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore)
-{
- struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
-
- return xe_device_get_root_tile(xe)->mmio.regs;
-}
-
-/*
- * The raw_reg_{read,write} macros are intended as a micro-optimization for
- * interrupt handlers so that the pointer indirection on uncore->regs can
- * be computed once (and presumably cached in a register) instead of generating
- * extra load instructions for each MMIO access.
- *
- * Given that these macros are only intended for non-GSI interrupt registers
- * (and the goal is to avoid extra instructions generated by the compiler),
- * these macros do not account for uncore->gsi_offset. Any caller that needs
- * to use these macros on a GSI register is responsible for adding the
- * appropriate GSI offset to the 'base' parameter.
- */
-#define raw_reg_read(base, reg) \
- readl(base + i915_mmio_reg_offset(reg))
-#define raw_reg_write(base, reg, value) \
- writel(value, base + i915_mmio_reg_offset(reg))
-
#define intel_uncore_forcewake_get(x, y) do { } while (0)
#define intel_uncore_forcewake_put(x, y) do { } while (0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
index ecb1c0707706..2a32faea9db5 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
@@ -5,4 +5,6 @@
#include <linux/types.h>
-typedef unsigned long intel_wakeref_t;
+typedef struct ref_tracker *intel_wakeref_t;
+
+#define INTEL_WAKEREF_DEF ERR_PTR(-ENOENT)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
index c2c30ece8f77..5dfc587c8237 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
@@ -9,20 +9,14 @@
#include <linux/errno.h>
#include <linux/types.h>
-struct drm_i915_gem_object;
+struct drm_gem_object;
struct intel_pxp;
static inline int intel_pxp_key_check(struct intel_pxp *pxp,
- struct drm_i915_gem_object *obj,
+ struct drm_gem_object *obj,
bool assign)
{
return -ENODEV;
}
-static inline bool
-i915_gem_object_is_protected(const struct drm_i915_gem_object *obj)
-{
- return false;
-}
-
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h
new file mode 100644
index 000000000000..05cbfb697b2b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_rom.h"
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
index eb40f1cb44f6..a7dbc6554d69 100644
--- a/drivers/gpu/drm/xe/display/ext/i915_irq.c
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -7,25 +7,24 @@
#include "i915_reg.h"
#include "intel_uncore.h"
-void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
- i915_reg_t iir, i915_reg_t ier)
+void gen2_irq_reset(struct intel_uncore *uncore, struct i915_irq_regs regs)
{
- intel_uncore_write(uncore, imr, 0xffffffff);
- intel_uncore_posting_read(uncore, imr);
+ intel_uncore_write(uncore, regs.imr, 0xffffffff);
+ intel_uncore_posting_read(uncore, regs.imr);
- intel_uncore_write(uncore, ier, 0);
+ intel_uncore_write(uncore, regs.ier, 0);
/* IIR can theoretically queue up two events. Be paranoid. */
- intel_uncore_write(uncore, iir, 0xffffffff);
- intel_uncore_posting_read(uncore, iir);
- intel_uncore_write(uncore, iir, 0xffffffff);
- intel_uncore_posting_read(uncore, iir);
+ intel_uncore_write(uncore, regs.iir, 0xffffffff);
+ intel_uncore_posting_read(uncore, regs.iir);
+ intel_uncore_write(uncore, regs.iir, 0xffffffff);
+ intel_uncore_posting_read(uncore, regs.iir);
}
/*
* We should clear IMR at preinstall/uninstall, and just check at postinstall.
*/
-void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
+void gen2_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
{
struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
u32 val = intel_uncore_read(uncore, reg);
@@ -42,16 +41,14 @@ void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
intel_uncore_posting_read(uncore, reg);
}
-void gen3_irq_init(struct intel_uncore *uncore,
- i915_reg_t imr, u32 imr_val,
- i915_reg_t ier, u32 ier_val,
- i915_reg_t iir)
+void gen2_irq_init(struct intel_uncore *uncore, struct i915_irq_regs regs,
+ u32 imr_val, u32 ier_val)
{
- gen3_assert_iir_is_zero(uncore, iir);
+ gen2_assert_iir_is_zero(uncore, regs.iir);
- intel_uncore_write(uncore, ier, ier_val);
- intel_uncore_write(uncore, imr, imr_val);
- intel_uncore_posting_read(uncore, imr);
+ intel_uncore_write(uncore, regs.ier, ier_val);
+ intel_uncore_write(uncore, regs.imr, imr_val);
+ intel_uncore_posting_read(uncore, regs.imr);
}
bool intel_irqs_enabled(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c
new file mode 100644
index 000000000000..9f54fad0f1c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_bo.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2024 Intel Corporation */
+
+#include <drm/drm_gem.h>
+
+#include "xe_bo.h"
+#include "intel_bo.h"
+
+bool intel_bo_is_tiled(struct drm_gem_object *obj)
+{
+ /* legacy tiling is unused */
+ return false;
+}
+
+bool intel_bo_is_userptr(struct drm_gem_object *obj)
+{
+ /* xe does not have userptr bos */
+ return false;
+}
+
+bool intel_bo_is_shmem(struct drm_gem_object *obj)
+{
+ return false;
+}
+
+bool intel_bo_is_protected(struct drm_gem_object *obj)
+{
+ return false;
+}
+
+void intel_bo_flush_if_display(struct drm_gem_object *obj)
+{
+}
+
+int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ return drm_gem_prime_mmap(obj, vma);
+}
+
+int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size)
+{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct ttm_bo_kmap_obj map;
+ void *src;
+ bool is_iomem;
+ int ret;
+
+ ret = xe_bo_lock(bo, true);
+ if (ret)
+ return ret;
+
+ ret = ttm_bo_kmap(&bo->ttm, offset >> PAGE_SHIFT, 1, &map);
+ if (ret)
+ goto out_unlock;
+
+ offset &= ~PAGE_MASK;
+ src = ttm_kmap_obj_virtual(&map, &is_iomem);
+ src += offset;
+ if (is_iomem)
+ memcpy_fromio(dst, (void __iomem *)src, size);
+ else
+ memcpy(dst, src, size);
+
+ ttm_bo_kunmap(&map);
+out_unlock:
+ xe_bo_unlock(bo);
+ return ret;
+}
+
+struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj)
+{
+ return NULL;
+}
+
+struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj,
+ struct intel_frontbuffer *front)
+{
+ return front;
+}
+
+void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj)
+{
+ /* FIXME */
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
index 63ce97cc4cfe..4d209ebc26c2 100644
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -11,8 +11,10 @@
#include "intel_fb_bo.h"
#include "xe_bo.h"
-void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
+void intel_fb_bo_framebuffer_fini(struct drm_gem_object *obj)
{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
if (bo->flags & XE_BO_FLAG_PINNED) {
/* Unpin our kernel fb first */
xe_bo_lock(bo, false);
@@ -23,9 +25,10 @@ void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
}
int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
- struct xe_bo *bo,
+ struct drm_gem_object *obj,
struct drm_mode_fb_cmd2 *mode_cmd)
{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = to_xe_device(bo->ttm.base.dev);
int ret;
@@ -65,11 +68,11 @@ err:
return ret;
}
-struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
- struct drm_file *filp,
- const struct drm_mode_fb_cmd2 *mode_cmd)
+struct drm_gem_object *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+ struct drm_file *filp,
+ const struct drm_mode_fb_cmd2 *mode_cmd)
{
- struct drm_i915_gem_object *bo;
+ struct xe_bo *bo;
struct drm_gem_object *gem = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
if (!gem)
@@ -78,11 +81,11 @@ struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
bo = gem_to_xe_bo(gem);
/* Require vram placement or dma-buf import */
if (IS_DGFX(i915) &&
- !xe_bo_can_migrate(gem_to_xe_bo(gem), XE_PL_VRAM0) &&
+ !xe_bo_can_migrate(bo, XE_PL_VRAM0) &&
bo->ttm.type != ttm_bo_type_sg) {
drm_gem_object_put(gem);
return ERR_PTR(-EREMOTE);
}
- return bo;
+ return gem;
}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.h b/drivers/gpu/drm/xe/display/intel_fb_bo.h
deleted file mode 100644
index 5d365b925b7a..000000000000
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#ifndef __INTEL_FB_BO_H__
-#define __INTEL_FB_BO_H__
-
-struct drm_file;
-struct drm_mode_fb_cmd2;
-struct drm_i915_private;
-struct intel_framebuffer;
-struct xe_bo;
-
-void intel_fb_bo_framebuffer_fini(struct xe_bo *bo);
-int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
- struct xe_bo *bo,
- struct drm_mode_fb_cmd2 *mode_cmd);
-
-struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
- struct drm_file *filp,
- const struct drm_mode_fb_cmd2 *mode_cmd);
-
-#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 99499d6c0256..ca95fcd098ec 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -6,6 +6,7 @@
#include <drm/drm_fb_helper.h>
#include "intel_display_types.h"
+#include "intel_fb.h"
#include "intel_fbdev_fb.h"
#include "xe_bo.h"
#include "xe_ttm_stolen_mgr.h"
@@ -20,7 +21,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
struct drm_device *dev = helper->dev;
struct xe_device *xe = to_xe_device(dev);
struct drm_mode_fb_cmd2 mode_cmd = {};
- struct drm_i915_gem_object *obj;
+ struct xe_bo *obj;
int size;
/* we don't do packed 24bpp */
@@ -64,13 +65,13 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
goto err;
}
- fb = intel_framebuffer_create(obj, &mode_cmd);
+ fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd);
if (IS_ERR(fb)) {
xe_bo_unpin_map_no_vm(obj);
goto err;
}
- drm_gem_object_put(intel_bo_to_drm_bo(obj));
+ drm_gem_object_put(&obj->ttm.base);
return to_intel_framebuffer(fb);
@@ -79,8 +80,9 @@ err:
}
int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
- struct drm_i915_gem_object *obj, struct i915_vma *vma)
+ struct drm_gem_object *_obj, struct i915_vma *vma)
{
+ struct xe_bo *obj = gem_to_xe_bo(_obj);
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
if (!(obj->flags & XE_BO_FLAG_SYSTEM)) {
@@ -100,7 +102,7 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info
XE_WARN_ON(iosys_map_is_null(&obj->vmap));
info->screen_base = obj->vmap.vaddr_iomem;
- info->screen_size = intel_bo_to_drm_bo(obj)->size;
+ info->screen_size = obj->ttm.base.size;
return 0;
}
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index c6e0c8d77a70..b5502f335f53 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -4,16 +4,16 @@
*/
#include "xe_display.h"
-#include "regs/xe_regs.h"
+#include "regs/xe_irq_regs.h"
#include <linux/fb.h>
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
+#include <drm/drm_probe_helper.h>
#include <uapi/drm/xe_drm.h>
#include "soc/intel_dram.h"
-#include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */
#include "intel_acpi.h"
#include "intel_audio.h"
#include "intel_bw.h"
@@ -34,7 +34,7 @@
static bool has_display(struct xe_device *xe)
{
- return HAS_DISPLAY(xe);
+ return HAS_DISPLAY(&xe->display);
}
/**
@@ -202,12 +202,14 @@ int xe_display_init(struct xe_device *xe)
void xe_display_fini(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
intel_hpd_poll_fini(xe);
- intel_hdcp_component_fini(xe);
+ intel_hdcp_component_fini(display);
intel_audio_deinit(xe);
}
@@ -321,7 +323,9 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
* properly.
*/
intel_power_domains_disable(xe);
- intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (!runtime)
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+
if (!runtime && has_display(xe)) {
drm_kms_helper_poll_disable(&xe->drm);
intel_display_driver_disable_user_access(xe);
@@ -330,7 +334,8 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
xe_display_flush_cleanup_work(xe);
- intel_dp_mst_suspend(xe);
+ if (!runtime)
+ intel_dp_mst_suspend(xe);
intel_hpd_cancel_work(xe);
@@ -341,7 +346,7 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
- intel_dmc_suspend(xe);
+ intel_dmc_suspend(display);
if (runtime && has_display(xe))
intel_hpd_poll_enable(xe);
@@ -352,6 +357,36 @@ void xe_display_pm_suspend(struct xe_device *xe)
__xe_display_pm_suspend(xe, false);
}
+void xe_display_pm_shutdown(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_disable(xe);
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (has_display(xe)) {
+ drm_kms_helper_poll_disable(&xe->drm);
+ intel_display_driver_disable_user_access(xe);
+ intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
+ intel_dp_mst_suspend(xe);
+ intel_hpd_cancel_work(xe);
+
+ if (has_display(xe))
+ intel_display_driver_suspend_access(xe);
+
+ intel_encoder_suspend_all(display);
+ intel_encoder_shutdown_all(display);
+
+ intel_opregion_suspend(display, PCI_D3cold);
+
+ intel_dmc_suspend(display);
+}
+
void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
if (!xe->info.probe_display)
@@ -376,6 +411,19 @@ void xe_display_pm_suspend_late(struct xe_device *xe)
intel_display_power_suspend_late(xe);
}
+void xe_display_pm_shutdown_late(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ /*
+ * The only requirement is to reboot with display DC states disabled,
+ * for now leaving all display power wells in the INIT power domain
+ * enabled.
+ */
+ intel_power_domains_driver_remove(xe);
+}
+
void xe_display_pm_resume_early(struct xe_device *xe)
{
if (!xe->info.probe_display)
@@ -393,7 +441,7 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
if (!xe->info.probe_display)
return;
- intel_dmc_resume(xe);
+ intel_dmc_resume(display);
if (has_display(xe))
drm_mode_config_reset(&xe->drm);
@@ -405,7 +453,9 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_display_driver_resume_access(xe);
/* MST sideband requires HPD interrupts enabled */
- intel_dp_mst_resume(xe);
+ if (!runtime)
+ intel_dp_mst_resume(xe);
+
if (!runtime && has_display(xe)) {
intel_display_driver_resume(xe);
drm_kms_helper_poll_enable(&xe->drm);
@@ -417,7 +467,8 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_opregion_resume(display);
- intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
+ if (!runtime)
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
intel_power_domains_enable(xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index bed55fd26f30..17afa537aee5 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -35,7 +35,9 @@ void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
void xe_display_pm_suspend(struct xe_device *xe);
+void xe_display_pm_shutdown(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe);
+void xe_display_pm_shutdown_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
@@ -66,7 +68,9 @@ static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_shutdown(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
+static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f99d901a3214..f95375451e2f 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
if (!vma)
return false;
+ /* Set scanout flag for WC mapping */
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
NULL, PAGE_ALIGN(size),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT);
+ XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
if (IS_ERR(obj)) {
kfree(vma);
return false;
@@ -73,5 +74,9 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
{
- /* TODO: add xe specific flush_map() for dsb buffer object. */
+ /*
+ * The memory barrier here is to ensure coherency of DSB vs MMIO,
+ * both for weak ordering archs and discrete cards.
+ */
+ xe_device_wmb(dsb_buf->vma->bo->tile->xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index b58fc4ba2aac..761510ae0690 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -79,12 +79,14 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
const struct i915_gtt_view *view,
- struct i915_vma *vma)
+ struct i915_vma *vma,
+ u64 physical_alignment)
{
struct xe_device *xe = to_xe_device(fb->base.dev);
struct xe_tile *tile0 = xe_device_get_root_tile(xe);
struct xe_ggtt *ggtt = tile0->mem.ggtt;
- struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt;
+ struct drm_gem_object *obj = intel_fb_bo(&fb->base);
+ struct xe_bo *bo = gem_to_xe_bo(obj), *dpt;
u32 dpt_size, size = bo->ttm.base.size;
if (view->type == I915_GTT_VIEW_NORMAL)
@@ -98,23 +100,29 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
XE_PAGE_SIZE);
if (IS_DGFX(xe))
- dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM0 |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE);
+ dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM0 |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ physical_alignment);
else
- dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE);
+ dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ physical_alignment);
if (IS_ERR(dpt))
- dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE);
+ dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ physical_alignment);
if (IS_ERR(dpt))
return PTR_ERR(dpt);
@@ -183,9 +191,11 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
const struct i915_gtt_view *view,
- struct i915_vma *vma)
+ struct i915_vma *vma,
+ u64 physical_alignment)
{
- struct xe_bo *bo = intel_fb_obj(&fb->base);
+ struct drm_gem_object *obj = intel_fb_bo(&fb->base);
+ struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = to_xe_device(fb->base.dev);
struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
u32 align;
@@ -264,12 +274,14 @@ out:
}
static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
- const struct i915_gtt_view *view)
+ const struct i915_gtt_view *view,
+ u64 physical_alignment)
{
struct drm_device *dev = fb->base.dev;
struct xe_device *xe = to_xe_device(dev);
struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
- struct xe_bo *bo = intel_fb_obj(&fb->base);
+ struct drm_gem_object *obj = intel_fb_bo(&fb->base);
+ struct xe_bo *bo = gem_to_xe_bo(obj);
int ret;
if (!vma)
@@ -312,9 +324,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
vma->bo = bo;
if (intel_fb_uses_dpt(&fb->base))
- ret = __xe_pin_fb_vma_dpt(fb, view, vma);
+ ret = __xe_pin_fb_vma_dpt(fb, view, vma, physical_alignment);
else
- ret = __xe_pin_fb_vma_ggtt(fb, view, vma);
+ ret = __xe_pin_fb_vma_ggtt(fb, view, vma, physical_alignment);
if (ret)
goto err_unpin;
@@ -355,7 +367,7 @@ intel_fb_pin_to_ggtt(const struct drm_framebuffer *fb,
{
*out_flags = 0;
- return __xe_pin_fb_vma(to_intel_framebuffer(fb), view);
+ return __xe_pin_fb_vma(to_intel_framebuffer(fb), view, phys_alignment);
}
void intel_fb_unpin_vma(struct i915_vma *vma, unsigned long flags)
@@ -366,13 +378,18 @@ void intel_fb_unpin_vma(struct i915_vma *vma, unsigned long flags)
int intel_plane_pin_fb(struct intel_plane_state *plane_state)
{
struct drm_framebuffer *fb = plane_state->hw.fb;
- struct xe_bo *bo = intel_fb_obj(fb);
+ struct drm_gem_object *obj = intel_fb_bo(fb);
+ struct xe_bo *bo = gem_to_xe_bo(obj);
struct i915_vma *vma;
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+ u64 phys_alignment = plane->min_alignment(plane, fb, 0);
/* We reject creating !SCANOUT fb's, so this is weird.. */
drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT));
- vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt);
+ vma = __xe_pin_fb_vma(intel_fb, &plane_state->view.gtt, phys_alignment);
+
if (IS_ERR(vma))
return PTR_ERR(vma);
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 6619a40aed15..7c02323e9531 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -30,26 +30,29 @@ struct intel_hdcp_gsc_message {
#define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header)
-bool intel_hdcp_gsc_cs_required(struct xe_device *xe)
+bool intel_hdcp_gsc_cs_required(struct intel_display *display)
{
- return DISPLAY_VER(xe) >= 14;
+ return DISPLAY_VER(display) >= 14;
}
-bool intel_hdcp_gsc_check_status(struct xe_device *xe)
+bool intel_hdcp_gsc_check_status(struct intel_display *display)
{
+ struct xe_device *xe = to_xe_device(display->drm);
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_gt *gt = tile->media_gt;
struct xe_gsc *gsc = &gt->uc.gsc;
bool ret = true;
+ unsigned int fw_ref;
- if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) {
+ if (!gsc || !xe_uc_fw_is_enabled(&gsc->fw)) {
drm_dbg_kms(&xe->drm,
"GSC Components not ready for HDCP2.x\n");
return false;
}
xe_pm_runtime_get(xe);
- if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref) {
drm_dbg_kms(&xe->drm,
"failed to get forcewake to check proxy status\n");
ret = false;
@@ -59,16 +62,17 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe)
if (!xe_gsc_proxy_init_done(gsc))
ret = false;
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
out:
xe_pm_runtime_put(xe);
return ret;
}
/*This function helps allocate memory for the command that we will send to gsc cs */
-static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
+static int intel_hdcp_gsc_initialize_message(struct intel_display *display,
struct intel_hdcp_gsc_message *hdcp_message)
{
+ struct xe_device *xe = to_xe_device(display->drm);
struct xe_bo *bo = NULL;
u64 cmd_in, cmd_out;
int ret = 0;
@@ -80,7 +84,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
XE_BO_FLAG_GGTT);
if (IS_ERR(bo)) {
- drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
+ drm_err(display->drm, "Failed to allocate bo for HDCP streaming command!\n");
ret = PTR_ERR(bo);
goto out;
}
@@ -96,7 +100,7 @@ out:
return ret;
}
-static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe)
+static int intel_hdcp_gsc_hdcp2_init(struct intel_display *display)
{
struct intel_hdcp_gsc_message *hdcp_message;
int ret;
@@ -110,14 +114,14 @@ static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe)
* NOTE: No need to lock the comp mutex here as it is already
* going to be taken before this function called
*/
- ret = intel_hdcp_gsc_initialize_message(xe, hdcp_message);
+ ret = intel_hdcp_gsc_initialize_message(display, hdcp_message);
if (ret) {
- drm_err(&xe->drm, "Could not initialize hdcp_message\n");
+ drm_err(display->drm, "Could not initialize hdcp_message\n");
kfree(hdcp_message);
return ret;
}
- xe->display.hdcp.hdcp_message = hdcp_message;
+ display->hdcp.hdcp_message = hdcp_message;
return ret;
}
@@ -137,7 +141,7 @@ static const struct i915_hdcp_ops gsc_hdcp_ops = {
.close_hdcp_session = intel_hdcp_gsc_close_session,
};
-int intel_hdcp_gsc_init(struct xe_device *xe)
+int intel_hdcp_gsc_init(struct intel_display *display)
{
struct i915_hdcp_arbiter *data;
int ret;
@@ -146,33 +150,33 @@ int intel_hdcp_gsc_init(struct xe_device *xe)
if (!data)
return -ENOMEM;
- mutex_lock(&xe->display.hdcp.hdcp_mutex);
- xe->display.hdcp.arbiter = data;
- xe->display.hdcp.arbiter->hdcp_dev = xe->drm.dev;
- xe->display.hdcp.arbiter->ops = &gsc_hdcp_ops;
- ret = intel_hdcp_gsc_hdcp2_init(xe);
+ mutex_lock(&display->hdcp.hdcp_mutex);
+ display->hdcp.arbiter = data;
+ display->hdcp.arbiter->hdcp_dev = display->drm->dev;
+ display->hdcp.arbiter->ops = &gsc_hdcp_ops;
+ ret = intel_hdcp_gsc_hdcp2_init(display);
if (ret)
kfree(data);
- mutex_unlock(&xe->display.hdcp.hdcp_mutex);
+ mutex_unlock(&display->hdcp.hdcp_mutex);
return ret;
}
-void intel_hdcp_gsc_fini(struct xe_device *xe)
+void intel_hdcp_gsc_fini(struct intel_display *display)
{
struct intel_hdcp_gsc_message *hdcp_message =
- xe->display.hdcp.hdcp_message;
- struct i915_hdcp_arbiter *arb = xe->display.hdcp.arbiter;
+ display->hdcp.hdcp_message;
+ struct i915_hdcp_arbiter *arb = display->hdcp.arbiter;
if (hdcp_message) {
xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo);
kfree(hdcp_message);
- xe->display.hdcp.hdcp_message = NULL;
+ display->hdcp.hdcp_message = NULL;
}
kfree(arb);
- xe->display.hdcp.arbiter = NULL;
+ display->hdcp.arbiter = NULL;
}
static int xe_gsc_send_sync(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index a50ab9eae40a..8c113463a3d5 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -170,7 +170,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
return false;
if (intel_framebuffer_init(to_intel_framebuffer(fb),
- bo, &mode_cmd)) {
+ &bo->ttm.base, &mode_cmd)) {
drm_dbg_kms(&xe->drm, "intel fb init failed\n");
goto err_bo;
}
@@ -248,7 +248,7 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
* the lookup of sysmem scratch pages.
*/
plane->check_plane(crtc_state, plane_state);
- plane->async_flip(plane, crtc_state, plane_state, true);
+ plane->async_flip(NULL, plane, crtc_state, plane_state, true);
return;
nofb:
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 81b71903675e..7c78496e6213 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -186,6 +186,7 @@
#define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10)
#define IECPUNIT_CLKGATE_DIS REG_BIT(22)
+#define RAMDFTUNIT_CLKGATE_DIS REG_BIT(9)
#define VDBOX_CGCTL3F18(base) XE_REG((base) + 0x3f18)
#define ALNUNIT_CLKGATE_DIS REG_BIT(13)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index bd604b9f08e4..0c9e4b2fafab 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -286,6 +286,9 @@
#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
#define LTCDD_CLKGATE_DIS REG_BIT(10)
+#define UNSLCGCTL9454 XE_REG(0x9454)
+#define LSCFE_CLKGATE_DIS REG_BIT(4)
+
#define XEHP_SLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x94d4)
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
#define L3_CLKGATE_DIS REG_BIT(16)
@@ -344,6 +347,14 @@
#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0)
#define FORCEWAKE_RENDER XE_REG(0xa278)
+
+#define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0)
+#define MEDIA_SLICE3_AWAKE_STATUS REG_BIT(4)
+#define MEDIA_SLICE2_AWAKE_STATUS REG_BIT(3)
+#define MEDIA_SLICE1_AWAKE_STATUS REG_BIT(2)
+#define RENDER_AWAKE_STATUS REG_BIT(1)
+#define MEDIA_SLICE0_AWAKE_STATUS REG_BIT(0)
+
#define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4)
#define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4)
#define FORCEWAKE_GSC XE_REG(0xa618)
@@ -556,62 +567,6 @@
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)
-/*
- * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
- * On newer platforms, VFs are using memory-based interrupts instead.
- * However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
- */
-
-#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
-#define INTR_GSC REG_BIT(31)
-#define INTR_GUC REG_BIT(25)
-#define INTR_MGUC REG_BIT(24)
-#define INTR_BCS8 REG_BIT(23)
-#define INTR_BCS(x) REG_BIT(15 - (x))
-#define INTR_CCS(x) REG_BIT(4 + (x))
-#define INTR_RCS0 REG_BIT(0)
-#define INTR_VECS(x) REG_BIT(31 - (x))
-#define INTR_VCS(x) REG_BIT(x)
-
-#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF)
-#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF)
-#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF)
-#define ENGINE1_MASK REG_GENMASK(31, 16)
-#define ENGINE0_MASK REG_GENMASK(15, 0)
-#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF)
-#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF)
-#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF)
-
-#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
-#define INTR_DATA_VALID REG_BIT(31)
-#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x)
-#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
-#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x)
-#define OTHER_GUC_INSTANCE 0
-#define OTHER_GSC_HECI2_INSTANCE 3
-#define OTHER_GSC_INSTANCE 6
-
-#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
-#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF)
-#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
-#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
-#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
-#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
-#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
-#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
-#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
-#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF)
-#define CCS0_CCS1_INTR_MASK XE_REG(0x190100)
-#define CCS2_CCS3_INTR_MASK XE_REG(0x190104)
-#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110)
-#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114)
-#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118)
-#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c)
-#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
-#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
-#define GSC_ER_COMPLETE REG_BIT(5)
-#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
-#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
-#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+#define SFC_DONE(n) XE_REG(0x1cc000 + (n) * 0x1000)
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index a5fd14307f94..2118f7dec287 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -84,6 +84,8 @@
#define HUC_LOADING_AGENT_GUC REG_BIT(1)
#define GUC_WOPCM_OFFSET_VALID REG_BIT(0)
#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4)
+#define GUC_PMTIMESTAMP_LO XE_REG(0xc3e8)
+#define GUC_PMTIMESTAMP_HI XE_REG(0xc3ec)
#define GUC_SEND_INTERRUPT XE_REG(0xc4c8)
#define GUC_SEND_TRIGGER REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
new file mode 100644
index 000000000000..1776b3f78ccb
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_IRQ_REGS_H_
+#define _XE_IRQ_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define PCU_IRQ_OFFSET 0x444e0
+#define GU_MISC_IRQ_OFFSET 0x444f0
+#define GU_MISC_GSE REG_BIT(27)
+
+#define DG1_MSTR_TILE_INTR XE_REG(0x190008)
+#define DG1_MSTR_IRQ REG_BIT(31)
+#define DG1_MSTR_TILE(t) REG_BIT(t)
+
+#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
+#define MASTER_IRQ REG_BIT(31)
+#define GU_MISC_IRQ REG_BIT(29)
+#define DISPLAY_IRQ REG_BIT(16)
+#define GT_DW_IRQ(x) REG_BIT(x)
+
+/*
+ * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
+ * On newer platforms, VFs are using memory-based interrupts instead.
+ * However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
+ */
+
+#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
+#define INTR_GSC REG_BIT(31)
+#define INTR_GUC REG_BIT(25)
+#define INTR_MGUC REG_BIT(24)
+#define INTR_BCS8 REG_BIT(23)
+#define INTR_BCS(x) REG_BIT(15 - (x))
+#define INTR_CCS(x) REG_BIT(4 + (x))
+#define INTR_RCS0 REG_BIT(0)
+#define INTR_VECS(x) REG_BIT(31 - (x))
+#define INTR_VCS(x) REG_BIT(x)
+
+#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF)
+#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF)
+#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF)
+#define ENGINE1_MASK REG_GENMASK(31, 16)
+#define ENGINE0_MASK REG_GENMASK(15, 0)
+#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF)
+#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF)
+
+#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
+#define INTR_DATA_VALID REG_BIT(31)
+#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x)
+#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
+#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x)
+#define OTHER_GUC_INSTANCE 0
+#define OTHER_GSC_HECI2_INSTANCE 3
+#define OTHER_GSC_INSTANCE 6
+
+#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
+#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF)
+#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
+#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
+#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
+#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
+#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
+#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
+#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF)
+#define CCS0_CCS1_INTR_MASK XE_REG(0x190100)
+#define CCS2_CCS3_INTR_MASK XE_REG(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c)
+#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
+#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
+#define GSC_ER_COMPLETE REG_BIT(5)
+#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
+#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
+#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 23f7dc5bbe99..51fd40ffafcb 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -128,7 +128,7 @@ struct xe_reg_mcr {
* options.
*/
#define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \
- .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
+ .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
})
static inline bool xe_reg_is_valid(struct xe_reg r)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index dfa869f0dddd..3293172b0128 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -11,10 +11,6 @@
#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12)
#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0)
-#define PCU_IRQ_OFFSET 0x444e0
-#define GU_MISC_IRQ_OFFSET 0x444f0
-#define GU_MISC_GSE REG_BIT(27)
-
#define GU_CNTL_PROTECTED XE_REG(0x10100C)
#define DRIVERINT_FLR_DIS REG_BIT(31)
@@ -57,16 +53,6 @@
#define MTL_MPE_FREQUENCY XE_REG(0x13802c)
#define MTL_RPE_MASK REG_GENMASK(8, 0)
-#define DG1_MSTR_TILE_INTR XE_REG(0x190008)
-#define DG1_MSTR_IRQ REG_BIT(31)
-#define DG1_MSTR_TILE(t) REG_BIT(t)
-
-#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
-#define MASTER_IRQ REG_BIT(31)
-#define GU_MISC_IRQ REG_BIT(29)
-#define DISPLAY_IRQ REG_BIT(16)
-#define GT_DW_IRQ(x) REG_BIT(x)
-
#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
#define VF_CAP REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 8dac069483e8..3e0ae40ebbd2 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,13 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
+#include <linux/iosys-map.h>
+#include <linux/math64.h>
+#include <linux/prandom.h>
+#include <linux/swap.h>
+
+#include <uapi/linux/sysinfo.h>
+
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
@@ -358,9 +365,242 @@ static void xe_bo_evict_kunit(struct kunit *test)
evict_test_run_device(xe);
}
+struct xe_bo_link {
+ struct list_head link;
+ struct xe_bo *bo;
+ u32 val;
+};
+
+#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
+
+static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t __maybe_unused i;
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ iosys_map_wr(&map, i, u32, val);
+ if (i == 0)
+ link->val = val;
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+ return 0;
+}
+
+static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo,
+ unsigned int bo_nr, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t i;
+ bool failed = false;
+
+ if (ret) {
+ KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr);
+ return true;
+ }
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ if (iosys_map_rd(&map, i, u32) != val) {
+ KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx",
+ bo_nr, (unsigned long long)i);
+ kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n",
+ (unsigned int)iosys_map_rd(&map, i, u32), val);
+ if (i == 0 && val != link->val)
+ kunit_info(test, "Looks like PRNG is out of sync.\n");
+ failed = true;
+ break;
+ }
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+
+ return failed;
+}
+
+/*
+ * Try to create system bos corresponding to twice the amount
+ * of available system memory to test shrinker functionality.
+ * If no swap space is available to accommodate the
+ * memory overcommit, mark bos purgeable.
+ */
+static int shrink_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = kunit_get_current_test();
+ LIST_HEAD(bos);
+ struct xe_bo_link *link, *next;
+ struct sysinfo si;
+ u64 ram, ram_and_swap, purgeable = 0, alloced, to_alloc, limit;
+ unsigned int interrupted = 0, successful = 0, count = 0;
+ struct rnd_state prng;
+ u64 rand_seed;
+ bool failed = false;
+
+ rand_seed = get_random_u64();
+ prandom_seed_state(&prng, rand_seed);
+ kunit_info(test, "Random seed is 0x%016llx.\n",
+ (unsigned long long)rand_seed);
+
+ /* Skip if execution time is expected to be too long. */
+
+ limit = SZ_32G;
+ /* IGFX with flat CCS needs to copy when swapping / shrinking */
+ if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
+ limit = SZ_16G;
+
+ si_meminfo(&si);
+ ram = (size_t)si.freeram * si.mem_unit;
+ if (ram > limit) {
+ kunit_skip(test, "Too long expected execution time.\n");
+ return 0;
+ }
+ to_alloc = ram * 2;
+
+ ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE;
+ if (to_alloc > ram_and_swap)
+ purgeable = to_alloc - ram_and_swap;
+ purgeable += div64_u64(purgeable, 5);
+
+ kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
+ (unsigned long)ram);
+ for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) {
+ struct xe_bo *bo;
+ unsigned int mem_type;
+ struct xe_ttm_tt *xe_tt;
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
+ KUNIT_FAIL(test, "Unexpected link allocation failure\n");
+ failed = true;
+ break;
+ }
+
+ INIT_LIST_HEAD(&link->link);
+
+ /* We can create bos using WC caching here. But it is slower. */
+ bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+ DRM_XE_GEM_CPU_CACHING_WB,
+ XE_BO_FLAG_SYSTEM);
+ if (IS_ERR(bo)) {
+ if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
+ bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
+ KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
+ kfree(link);
+ failed = true;
+ break;
+ }
+ xe_bo_lock(bo, false);
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+
+ /*
+ * Allocate purgeable bos first, because if we do it the
+ * other way around, they may not be subject to swapping...
+ */
+ if (alloced < purgeable) {
+ xe_tt->purgeable = true;
+ bo->ttm.priority = 0;
+ } else {
+ int ret = shrink_test_fill_random(bo, &prng, link);
+
+ if (ret) {
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+ KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
+ ERR_PTR(ret));
+ kfree(link);
+ failed = true;
+ break;
+ }
+ }
+
+ mem_type = bo->ttm.resource->mem_type;
+ xe_bo_unlock(bo);
+ link->bo = bo;
+ list_add_tail(&link->link, &bos);
+
+ if (mem_type != XE_PL_TT) {
+ KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
+ bo->ttm.resource->mem_type);
+ failed = true;
+ }
+ cond_resched();
+ if (signal_pending(current))
+ break;
+ }
+
+ /*
+ * Read back and destroy bos. Reset the pseudo-random seed to get an
+ * identical pseudo-random number sequence for readback.
+ */
+ prandom_seed_state(&prng, rand_seed);
+ list_for_each_entry_safe(link, next, &bos, link) {
+ static struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct xe_bo *bo = link->bo;
+ struct xe_ttm_tt *xe_tt;
+ int ret;
+
+ count++;
+ if (!signal_pending(current) && !failed) {
+ bool purgeable, intr = false;
+
+ xe_bo_lock(bo, NULL);
+
+ /* xe_tt->purgeable is cleared on validate. */
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+ purgeable = xe_tt->purgeable;
+ do {
+ ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
+ if (ret == -EINTR)
+ intr = true;
+ } while (ret == -EINTR && !signal_pending(current));
+
+ if (!ret && !purgeable)
+ failed = shrink_test_verify(test, bo, count, &prng, link);
+
+ xe_bo_unlock(bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Validation failed: %pe\n",
+ ERR_PTR(ret));
+ failed = true;
+ } else if (intr) {
+ interrupted++;
+ } else {
+ successful++;
+ }
+ }
+ xe_bo_put(link->bo);
+ list_del(&link->link);
+ kfree(link);
+ }
+ kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
+ interrupted, successful);
+
+ return 0;
+}
+
+static void xe_bo_shrink_kunit(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ shrink_test_run_device(xe);
+}
+
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
+ {.speed = KUNIT_SPEED_SLOW}),
{}
};
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 79be73b4a02b..6f9b7a266b41 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -43,19 +43,18 @@ static void read_l3cc_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 l3cc, l3cc_expected;
- unsigned int i;
+ unsigned int fw_ref, i;
u32 reg_val;
- u32 ret;
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (!(i & 1)) {
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i >> 1));
mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
} else {
@@ -72,7 +71,7 @@ static void read_l3cc_table(struct xe_gt *gt,
KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc,
"l3cc idx=%u has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void read_mocs_table(struct xe_gt *gt,
@@ -80,21 +79,20 @@ static void read_mocs_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 mocs, mocs_expected;
- unsigned int i;
+ unsigned int fw_ref, i;
u32 reg_val;
- u32 ret;
KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index,
"Unused entries index should have been defined\n");
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
mocs_expected = get_entry_control(info, i);
mocs = reg_val;
@@ -106,7 +104,7 @@ static void read_mocs_table(struct xe_gt *gt,
"mocs reg 0x%x has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static int mocs_kernel_test_run_device(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index e22bbf57fca7..04d6b95c6d87 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -10,7 +10,7 @@
#include <drm/drm_print.h>
-#include "xe_device_types.h"
+#include "xe_gt_types.h"
#include "xe_step.h"
/**
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 2a093540354e..ae6b337cdc54 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -283,6 +283,8 @@ struct xe_ttm_tt {
struct device *dev;
struct sg_table sgt;
struct sg_table *sg;
+ /** @purgeable: Whether the content of the pages of @ttm is purgeable. */
+ bool purgeable;
};
static int xe_tt_map_sg(struct ttm_tt *tt)
@@ -468,7 +470,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += vram->io_start;
mem->bus.is_iomem = true;
-#if !defined(CONFIG_X86)
+#if !IS_ENABLED(CONFIG_X86)
mem->bus.caching = ttm_write_combined;
#endif
return 0;
@@ -761,7 +763,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
if (xe_rpm_reclaim_safe(xe)) {
/*
* We might be called through swapout in the validation path of
- * another TTM device, so unconditionally acquire rpm here.
+ * another TTM device, so acquire rpm here.
*/
xe_pm_runtime_get(xe);
} else {
@@ -901,7 +903,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
}
}
- ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+ ret = ttm_bo_populate(&bo->ttm, &ctx);
if (ret)
goto err_res_free;
@@ -961,7 +963,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (ret)
return ret;
- ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+ ret = ttm_bo_populate(&bo->ttm, &ctx);
if (ret)
goto err_res_free;
@@ -1089,6 +1091,33 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
}
}
+static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
+{
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+
+ if (ttm_bo->ttm) {
+ struct ttm_placement place = {};
+ int ret = ttm_bo_validate(ttm_bo, &place, ctx);
+
+ drm_WARN_ON(&xe->drm, ret);
+ }
+}
+
+static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = false
+ };
+
+ if (ttm_bo->ttm) {
+ struct xe_ttm_tt *xe_tt =
+ container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->purgeable)
+ xe_ttm_bo_purge(ttm_bo, &ctx);
+ }
+}
+
const struct ttm_device_funcs xe_ttm_funcs = {
.ttm_tt_create = xe_ttm_tt_create,
.ttm_tt_populate = xe_ttm_tt_populate,
@@ -1101,6 +1130,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
.release_notify = xe_ttm_bo_release_notify,
.eviction_valuable = ttm_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+ .swap_notify = xe_ttm_bo_swap_notify,
};
static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
@@ -1431,7 +1461,8 @@ static struct xe_bo *
__xe_bo_create_locked(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- u16 cpu_caching, enum ttm_bo_type type, u32 flags)
+ u16 cpu_caching, enum ttm_bo_type type, u32 flags,
+ u64 alignment)
{
struct xe_bo *bo = NULL;
int err;
@@ -1460,6 +1491,8 @@ __xe_bo_create_locked(struct xe_device *xe,
if (IS_ERR(bo))
return bo;
+ bo->min_align = alignment;
+
/*
* Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
* to ensure the shared resv doesn't disappear under the bo, the bo
@@ -1500,16 +1533,18 @@ struct xe_bo *
xe_bo_create_locked_range(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags, u64 alignment)
{
- return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
+ flags, alignment);
}
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
enum ttm_bo_type type, u32 flags)
{
- return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
+ flags, 0);
}
struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
@@ -1519,7 +1554,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
{
struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
cpu_caching, ttm_bo_type_device,
- flags | XE_BO_FLAG_USER);
+ flags | XE_BO_FLAG_USER, 0);
if (!IS_ERR(bo))
xe_bo_unlock_vm_held(bo);
@@ -1543,6 +1578,17 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
size_t size, u64 offset,
enum ttm_bo_type type, u32 flags)
{
+ return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
+ type, flags, 0);
+}
+
+struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment)
+{
struct xe_bo *bo;
int err;
u64 start = offset == ~0ull ? 0 : offset;
@@ -1553,7 +1599,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
flags |= XE_BO_FLAG_GGTT;
bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
+ alignment);
if (IS_ERR(bo))
return bo;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 6e4be52306df..7fa44a0138b0 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -77,7 +77,7 @@ struct xe_bo *
xe_bo_create_locked_range(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags);
+ enum ttm_bo_type type, u32 flags, u64 alignment);
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
enum ttm_bo_type type, u32 flags);
@@ -94,6 +94,12 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size, u64 offset,
enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment);
struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size,
enum ttm_bo_type type, u32 flags);
@@ -312,8 +318,6 @@ static inline unsigned int xe_sg_segment_size(struct device *dev)
return round_down(max / 2, PAGE_SIZE);
}
-#define i915_gem_object_flush_if_display(obj) ((void)(obj))
-
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
/**
* xe_bo_is_mem_type - Whether the bo currently resides in the given
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 2ed558ac2264..13c6d8a69e91 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -76,9 +76,11 @@ struct xe_bo {
/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
struct list_head vram_userfault_link;
-};
-#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
-#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev)
+ /** @min_align: minimum alignment needed for this BO if different
+ * from default
+ */
+ u64 min_align;
+};
#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index fe4319eb13fd..492b4877433f 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -90,13 +90,32 @@ static int forcewake_open(struct inode *inode, struct file *file)
{
struct xe_device *xe = inode->i_private;
struct xe_gt *gt;
- u8 id;
+ u8 id, last_gt;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
- for_each_gt(gt, xe, id)
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ goto err_fw_get;
+ }
return 0;
+
+err_fw_get:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ else if (id == last_gt)
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ else
+ break;
+ }
+
+ xe_pm_runtime_put(xe);
+ return -ETIMEDOUT;
}
static int forcewake_release(struct inode *inode, struct file *file)
@@ -106,7 +125,7 @@ static int forcewake_release(struct inode *inode, struct file *file)
u8 id;
for_each_gt(gt, xe, id)
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(xe);
return 0;
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index bdb76e834e4c..0b0cd6aa1d9f 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -6,6 +6,7 @@
#include "xe_devcoredump.h"
#include "xe_devcoredump_types.h"
+#include <linux/ascii85.h>
#include <linux/devcoredump.h>
#include <generated/utsrelease.h>
@@ -16,9 +17,13 @@
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
+#include "xe_guc_log.h"
#include "xe_guc_submit.h"
#include "xe_hw_engine.h"
+#include "xe_module.h"
+#include "xe_pm.h"
#include "xe_sched_job.h"
#include "xe_vm.h"
@@ -85,9 +90,9 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
p = drm_coredump_printer(&iter);
- drm_printf(&p, "**** Xe Device Coredump ****\n");
- drm_printf(&p, "kernel: " UTS_RELEASE "\n");
- drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_puts(&p, "**** Xe Device Coredump ****\n");
+ drm_puts(&p, "kernel: " UTS_RELEASE "\n");
+ drm_puts(&p, "module: " KBUILD_MODNAME "\n");
ts = ktime_to_timespec64(ss->snapshot_time);
drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
@@ -96,20 +101,27 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
drm_printf(&p, "Process: %s\n", ss->process_name);
xe_device_snapshot_print(xe, &p);
- drm_printf(&p, "\n**** GuC CT ****\n");
- xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
- xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p);
+ drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id);
+ drm_printf(&p, "\tTile: %d\n", ss->gt->tile->id);
- drm_printf(&p, "\n**** Job ****\n");
- xe_sched_job_snapshot_print(coredump->snapshot.job, &p);
+ drm_puts(&p, "\n**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(ss->guc.log, &p);
+ drm_puts(&p, "\n**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(ss->guc.ct, &p);
- drm_printf(&p, "\n**** HW Engines ****\n");
+ drm_puts(&p, "\n**** Contexts ****\n");
+ xe_guc_exec_queue_snapshot_print(ss->ge, &p);
+
+ drm_puts(&p, "\n**** Job ****\n");
+ xe_sched_job_snapshot_print(ss->job, &p);
+
+ drm_puts(&p, "\n**** HW Engines ****\n");
for (i = 0; i < XE_NUM_HW_ENGINES; i++)
- if (coredump->snapshot.hwe[i])
- xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
- &p);
- drm_printf(&p, "\n**** VM state ****\n");
- xe_vm_snapshot_print(coredump->snapshot.vm, &p);
+ if (ss->hwe[i])
+ xe_engine_snapshot_print(ss->hwe[i], &p);
+
+ drm_puts(&p, "\n**** VM state ****\n");
+ xe_vm_snapshot_print(ss->vm, &p);
return count - iter.remain;
}
@@ -118,8 +130,14 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
{
int i;
- xe_guc_ct_snapshot_free(ss->ct);
- ss->ct = NULL;
+ xe_guc_log_snapshot_free(ss->guc.log);
+ ss->guc.log = NULL;
+
+ xe_guc_ct_snapshot_free(ss->guc.ct);
+ ss->guc.ct = NULL;
+
+ xe_guc_capture_put_matched_nodes(&ss->gt->uc.guc);
+ ss->matched_node = NULL;
xe_guc_exec_queue_snapshot_free(ss->ge);
ss->ge = NULL;
@@ -141,13 +159,20 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
{
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
+ struct xe_device *xe = coredump_to_xe(coredump);
+ unsigned int fw_ref;
+
+ xe_pm_runtime_get(xe);
/* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
xe_vm_snapshot_capture_delayed(ss->vm);
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
- xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
+
+ xe_pm_runtime_put(xe);
/* Calculate devcoredump size */
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
@@ -204,6 +229,7 @@ static void xe_devcoredump_free(void *data)
/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
coredump->captured = false;
+ coredump->job = NULL;
drm_info(&coredump_to_xe(coredump)->drm,
"Xe device coredump has been deleted.\n");
}
@@ -214,14 +240,13 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_hw_engine *hwe;
- enum xe_hw_engine_id id;
u32 adj_logical_mask = q->logical_mask;
u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
- int i;
+ unsigned int fw_ref;
bool cookie;
+ int i;
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
@@ -231,6 +256,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
strscpy(ss->process_name, process_name);
ss->gt = q->gt;
+ coredump->job = job;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
cookie = dma_fence_begin_signalling();
@@ -244,26 +270,19 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
}
/* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
- xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
- coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
- coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
- coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
- coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
+ ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
+ ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
+ ss->ge = xe_guc_exec_queue_snapshot_capture(q);
+ ss->job = xe_sched_job_snapshot_capture(job);
+ ss->vm = xe_vm_snapshot_capture(q->vm);
- for_each_hw_engine(hwe, q->gt, id) {
- if (hwe->class != q->hwe->class ||
- !(BIT(hwe->logical_instance) & adj_logical_mask)) {
- coredump->snapshot.hwe[id] = NULL;
- continue;
- }
- coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
- }
+ xe_engine_snapshot_capture_for_job(job);
queue_work(system_unbound_wq, &ss->work);
- xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
dma_fence_end_signalling(cookie);
}
@@ -310,3 +329,89 @@ int xe_devcoredump_init(struct xe_device *xe)
}
#endif
+
+/**
+ * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85
+ *
+ * The output is split to multiple lines because some print targets, e.g. dmesg
+ * cannot handle arbitrarily long lines. Note also that printing to dmesg in
+ * piece-meal fashion is not possible, each separate call to drm_puts() has a
+ * line-feed automatically added! Therefore, the entire output line must be
+ * constructed in a local buffer first, then printed in one atomic output call.
+ *
+ * There is also a scheduler yield call to prevent the 'task has been stuck for
+ * 120s' kernel hang check feature from firing when printing to a slow target
+ * such as dmesg over a serial port.
+ *
+ * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down.
+ *
+ * @p: the printer object to output to
+ * @prefix: optional prefix to add to output string
+ * @blob: the Binary Large OBject to dump out
+ * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32)
+ * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32)
+ */
+void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
+ const void *blob, size_t offset, size_t size)
+{
+ const u32 *blob32 = (const u32 *)blob;
+ char buff[ASCII85_BUFSZ], *line_buff;
+ size_t line_pos = 0;
+
+#define DMESG_MAX_LINE_LEN 800
+#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
+
+ if (size & 3)
+ drm_printf(p, "Size not word aligned: %zu", size);
+ if (offset & 3)
+ drm_printf(p, "Offset not word aligned: %zu", size);
+
+ line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(line_buff)) {
+ drm_printf(p, "Failed to allocate line buffer: %pe", line_buff);
+ return;
+ }
+
+ blob32 += offset / sizeof(*blob32);
+ size /= sizeof(*blob32);
+
+ if (prefix) {
+ strscpy(line_buff, prefix, DMESG_MAX_LINE_LEN - MIN_SPACE - 2);
+ line_pos = strlen(line_buff);
+
+ line_buff[line_pos++] = ':';
+ line_buff[line_pos++] = ' ';
+ }
+
+ while (size--) {
+ u32 val = *(blob32++);
+
+ strscpy(line_buff + line_pos, ascii85_encode(val, buff),
+ DMESG_MAX_LINE_LEN - line_pos);
+ line_pos += strlen(line_buff + line_pos);
+
+ if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) {
+ line_buff[line_pos++] = '\n';
+ line_buff[line_pos++] = 0;
+
+ drm_puts(p, line_buff);
+
+ line_pos = 0;
+
+ /* Prevent 'stuck thread' time out errors */
+ cond_resched();
+ }
+ }
+
+ if (line_pos) {
+ line_buff[line_pos++] = '\n';
+ line_buff[line_pos++] = 0;
+
+ drm_puts(p, line_buff);
+ }
+
+ kfree(line_buff);
+
+#undef MIN_SPACE
+#undef DMESG_MAX_LINE_LEN
+}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index e2fa65ce0932..a4eebc285fc8 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -6,6 +6,9 @@
#ifndef _XE_DEVCOREDUMP_H_
#define _XE_DEVCOREDUMP_H_
+#include <linux/types.h>
+
+struct drm_printer;
struct xe_device;
struct xe_sched_job;
@@ -23,4 +26,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe)
}
#endif
+void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
+ const void *blob, size_t offset, size_t size);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 440d05d77a5a..3703ddea1252 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -34,16 +34,27 @@ struct xe_devcoredump_snapshot {
/** @work: Workqueue for deferred capture outside of signaling context */
struct work_struct work;
- /* GuC snapshots */
- /** @ct: GuC CT snapshot */
- struct xe_guc_ct_snapshot *ct;
- /** @ge: Guc Engine snapshot */
+ /** @guc: GuC snapshots */
+ struct {
+ /** @guc.ct: GuC CT snapshot */
+ struct xe_guc_ct_snapshot *ct;
+ /** @guc.log: GuC log snapshot */
+ struct xe_guc_log_snapshot *log;
+ } guc;
+
+ /** @ge: GuC Submission Engine snapshot */
struct xe_guc_submit_exec_queue_snapshot *ge;
/** @hwe: HW Engine snapshot array */
struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
/** @job: Snapshot of job state */
struct xe_sched_job_snapshot *job;
+ /**
+ * @matched_node: The matched capture node for timedout job
+ * this single-node tracker works because devcoredump will always only
+ * produce one hw-engine capture per devcoredump event
+ */
+ struct __guc_capture_parsed_output *matched_node;
/** @vm: Snapshot of VM state */
struct xe_vm_snapshot *vm;
@@ -69,6 +80,8 @@ struct xe_devcoredump {
bool captured;
/** @snapshot: Snapshot is captured at time of the first crash */
struct xe_devcoredump_snapshot snapshot;
+ /** @job: Point to the faulting job */
+ struct xe_sched_job *job;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index a1987b554a8d..06d6db8b50f9 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -5,10 +5,11 @@
#include "xe_device.h"
+#include <linux/aperture.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <linux/units.h>
-#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_client.h>
#include <drm/drm_gem_ttm_helper.h>
@@ -301,7 +302,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe_display_driver_set_hooks(&driver);
- err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+ err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
if (err)
return ERR_PTR(err);
@@ -349,7 +350,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted);
- xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
+ WQ_MEM_RECLAIM);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
@@ -373,6 +375,12 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
err:
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
+
+static bool xe_driver_flr_disabled(struct xe_device *xe)
+{
+ return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+}
/*
* The driver-initiated FLR is the highest level of reset that we can trigger
@@ -387,17 +395,12 @@ err:
* if/when a new instance of i915 is bound to the device it will do a full
* re-init anyway.
*/
-static void xe_driver_flr(struct xe_device *xe)
+static void __xe_driver_flr(struct xe_device *xe)
{
const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
int ret;
- if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
- drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
- return;
- }
-
drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
/*
@@ -409,25 +412,25 @@ static void xe_driver_flr(struct xe_device *xe)
* is still pending (unless the HW is totally dead), but better to be
* safe in case something unexpected happens
*/
- ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
return;
}
- xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
/* Trigger the actual Driver-FLR */
- xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+ xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
/* Wait for hardware teardown to complete */
- ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
return;
}
/* Wait for hardware/firmware re-init to complete */
- ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
@@ -435,7 +438,17 @@ static void xe_driver_flr(struct xe_device *xe)
}
/* Clear sticky completion status */
- xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr(struct xe_device *xe)
+{
+ if (xe_driver_flr_disabled(xe)) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ __xe_driver_flr(xe);
}
static void xe_driver_flr_fini(void *arg)
@@ -478,16 +491,15 @@ mask_err:
return err;
}
-static bool verify_lmem_ready(struct xe_gt *gt)
+static bool verify_lmem_ready(struct xe_device *xe)
{
- u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
+ u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
return !!val;
}
static int wait_for_lmem_ready(struct xe_device *xe)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
unsigned long timeout, start;
if (!IS_DGFX(xe))
@@ -496,7 +508,7 @@ static int wait_for_lmem_ready(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return 0;
- if (verify_lmem_ready(gt))
+ if (verify_lmem_ready(xe))
return 0;
drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
@@ -525,13 +537,14 @@ static int wait_for_lmem_ready(struct xe_device *xe)
msleep(20);
- } while (!verify_lmem_ready(gt));
+ } while (!verify_lmem_ready(xe));
drm_dbg(&xe->drm, "lmem ready after %ums",
jiffies_to_msecs(jiffies - start));
return 0;
}
+ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
static void update_device_info(struct xe_device *xe)
{
@@ -579,19 +592,21 @@ int xe_device_probe_early(struct xe_device *xe)
return 0;
}
-static int xe_device_set_has_flat_ccs(struct xe_device *xe)
+static int probe_has_flat_ccs(struct xe_device *xe)
{
+ struct xe_gt *gt;
+ unsigned int fw_ref;
u32 reg;
- int err;
+ /* Always enabled/disabled, no runtime check to do */
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
return 0;
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ gt = xe_root_mmio_gt(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return err;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
@@ -600,7 +615,8 @@ static int xe_device_set_has_flat_ccs(struct xe_device *xe)
drm_dbg(&xe->drm,
"Flat CCS has been disabled in bios, May lead to performance impact");
- return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
int xe_device_probe(struct xe_device *xe)
@@ -636,6 +652,13 @@ int xe_device_probe(struct xe_device *xe)
err = xe_gt_init_early(gt);
if (err)
return err;
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
}
for_each_tile(tile, xe, id) {
@@ -651,11 +674,9 @@ int xe_device_probe(struct xe_device *xe)
err = xe_ggtt_init_early(tile->mem.ggtt);
if (err)
return err;
- if (IS_SRIOV_VF(xe)) {
- err = xe_memirq_init(&tile->sriov.vf.memirq);
- if (err)
- return err;
- }
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
}
for_each_gt(gt, xe, id) {
@@ -679,7 +700,7 @@ int xe_device_probe(struct xe_device *xe)
if (err)
goto err;
- err = xe_device_set_has_flat_ccs(xe);
+ err = probe_has_flat_ccs(xe);
if (err)
goto err;
@@ -789,6 +810,24 @@ void xe_device_remove(struct xe_device *xe)
void xe_device_shutdown(struct xe_device *xe)
{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_dbg(&xe->drm, "Shutting down device\n");
+
+ if (xe_driver_flr_disabled(xe)) {
+ xe_display_pm_shutdown(xe);
+
+ xe_irq_suspend(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
+
+ xe_display_pm_shutdown_late(xe);
+ } else {
+ /* BOOM! */
+ __xe_driver_flr(xe);
+ }
}
/**
@@ -802,11 +841,9 @@ void xe_device_shutdown(struct xe_device *xe)
*/
void xe_device_wmb(struct xe_device *xe)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
-
wmb();
if (IS_DGFX(xe))
- xe_mmio_write32(gt, VF_CAP_REG, 0);
+ xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
}
/**
@@ -830,6 +867,7 @@ void xe_device_wmb(struct xe_device *xe)
void xe_device_td_flush(struct xe_device *xe)
{
struct xe_gt *gt;
+ unsigned int fw_ref;
u8 id;
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
@@ -844,10 +882,11 @@ void xe_device_td_flush(struct xe_device *xe)
if (xe_gt_is_media_type(gt))
continue;
- if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
- xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
/*
* FIXME: We can likely do better here with our choice of
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -855,36 +894,36 @@ void xe_device_td_flush(struct xe_device *xe)
* scenario on current platforms if all cache entries are
* transient and need to be flushed..
*/
- if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
150, NULL, false))
xe_gt_err_once(gt, "TD flush timeout\n");
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
}
void xe_device_l2_flush(struct xe_device *xe)
{
struct xe_gt *gt;
- int err;
+ unsigned int fw_ref;
gt = xe_root_mmio_gt(xe);
if (!XE_WA(gt, 16023588340))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
spin_lock(&gt->global_invl_lock);
- xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
- if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
@@ -919,6 +958,7 @@ void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
for_each_gt(gt, xe, id) {
drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tTile: %u\n", gt->tile->id);
drm_printf(p, "\tType: %s\n",
gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
drm_printf(p, "\tIP ver: %u.%u.%u\n",
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 34620ef855c0..f1fbfe916867 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -9,6 +9,8 @@
#include <drm/drm_util.h>
#include "xe_device_types.h"
+#include "xe_gt_types.h"
+#include "xe_sriov.h"
static inline struct xe_device *to_xe_device(const struct drm_device *dev)
{
@@ -138,7 +140,7 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe)
static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
{
- return &gt->mmio.fw;
+ return &gt->pm.fw;
}
void xe_device_assert_mem_access(struct xe_device *xe);
@@ -153,11 +155,22 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
return xe->info.has_sriov;
}
+static inline bool xe_device_has_msix(struct xe_device *xe)
+{
+ /* TODO: change this when MSI-X support is fully integrated */
+ return false;
+}
+
static inline bool xe_device_has_memirq(struct xe_device *xe)
{
return GRAPHICS_VERx100(xe) >= 1250;
}
+static inline bool xe_device_uses_memirq(struct xe_device *xe)
+{
+ return xe_device_has_memirq(xe) && (IS_SRIOV_VF(xe) || xe_device_has_msix(xe));
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 687f3a9039bb..b9ea455d6f59 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -14,7 +14,6 @@
#include "xe_devcoredump_types.h"
#include "xe_heci_gsc.h"
-#include "xe_gt_types.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
#include "xe_oa.h"
@@ -108,6 +107,45 @@ struct xe_mem_region {
};
/**
+ * struct xe_mmio - register mmio structure
+ *
+ * Represents an MMIO region that the CPU may use to access registers. A
+ * region may share its IO map with other regions (e.g., all GTs within a
+ * tile share the same map with their parent tile, but represent different
+ * subregions of the overall IO space).
+ */
+struct xe_mmio {
+ /** @tile: Backpointer to tile, used for tracing */
+ struct xe_tile *tile;
+
+ /** @regs: Map used to access registers. */
+ void __iomem *regs;
+
+ /**
+ * @sriov_vf_gt: Backpointer to GT.
+ *
+ * This pointer is only set for GT MMIO regions and only when running
+ * as an SRIOV VF structure
+ */
+ struct xe_gt *sriov_vf_gt;
+
+ /**
+ * @regs_size: Length of the register region within the map.
+ *
+ * The size of the iomap set in *regs is generally larger than the
+ * register mmio space since it includes unused regions and/or
+ * non-register regions such as the GGTT PTEs.
+ */
+ size_t regs_size;
+
+ /** @adj_limit: adjust MMIO address if address is below this value */
+ u32 adj_limit;
+
+ /** @adj_offset: offset to add to MMIO address when adjusting */
+ u32 adj_offset;
+};
+
+/**
* struct xe_tile - hardware tile structure
*
* From a driver perspective, a "tile" is effectively a complete GPU, containing
@@ -148,26 +186,14 @@ struct xe_tile {
* * 4MB-8MB: reserved
* * 8MB-16MB: global GTT
*/
- struct {
- /** @mmio.size: size of tile's MMIO space */
- size_t size;
-
- /** @mmio.regs: pointer to tile's MMIO space (starting with registers) */
- void __iomem *regs;
- } mmio;
+ struct xe_mmio mmio;
/**
* @mmio_ext: MMIO-extension info for a tile.
*
* Each tile has its own additional 256MB (28-bit) MMIO-extension space.
*/
- struct {
- /** @mmio_ext.size: size of tile's additional MMIO-extension space */
- size_t size;
-
- /** @mmio_ext.regs: pointer to tile's additional MMIO-extension space */
- void __iomem *regs;
- } mmio_ext;
+ struct xe_mmio mmio_ext;
/** @mem: memory management info for tile */
struct {
@@ -200,14 +226,14 @@ struct xe_tile {
struct xe_lmtt lmtt;
} pf;
struct {
- /** @sriov.vf.memirq: Memory Based Interrupts. */
- struct xe_memirq memirq;
-
/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
struct xe_ggtt_node *ggtt_balloon[2];
} vf;
} sriov;
+ /** @memirq: Memory Based Interrupts. */
+ struct xe_memirq memirq;
+
/** @pcode: tile's PCODE */
struct {
/** @pcode.lock: protecting tile's PCODE mailbox data */
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index fb52a23e28f8..22f0f1a6dfd5 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -278,6 +278,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
struct xe_hw_engine *hwe;
struct xe_exec_queue *q;
u64 gpu_timestamp;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
@@ -303,13 +304,16 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
continue;
fw = xe_hw_engine_to_fw_domain(hwe);
- if (xe_force_wake_get(gt_to_fw(gt), fw)) {
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), fw);
+ if (!xe_force_wake_ref_has_domain(fw_ref, fw)) {
hwe = NULL;
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
break;
}
gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
break;
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 7deb480e26af..1158b6062a6c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -143,7 +143,7 @@ struct xe_exec_queue {
/** @hw_engine_group_link: link into exec queues in the same hw engine group */
struct list_head hw_engine_group_link;
/** @lrc: logical ring context for this exec queue */
- struct xe_lrc *lrc[];
+ struct xe_lrc *lrc[] __counted_by(width);
};
/**
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 6a59165b9569..a8c416a48812 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -44,6 +44,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
u32 ctx_id)
{
struct xe_gt *gt = hwe->gt;
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = gt_to_xe(gt);
u64 lrc_desc;
@@ -58,7 +59,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
}
if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
- xe_mmio_write32(hwe->gt, RCU_MODE,
+ xe_mmio_write32(mmio, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
@@ -76,17 +77,17 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
*/
wmb();
- xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
xe_bo_ggtt_addr(hwe->hwsp));
- xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
- xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
+ xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
+ xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
- xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
lower_32_bits(lrc_desc));
- xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
upper_32_bits(lrc_desc));
- xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
EL_CTRL_LOAD);
}
@@ -168,8 +169,8 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe)
struct xe_gt *gt = hwe->gt;
u32 hi, lo;
- lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
- hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
+ lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
+ hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
return lo | (u64)hi << 32;
}
@@ -312,7 +313,7 @@ execlist_run_job(struct drm_sched_job *drm_job)
q->ring_ops->emit_job(job);
xe_execlist_make_active(exl);
- return dma_fence_get(job->fence);
+ return job->fence;
}
static void execlist_job_free(struct drm_sched_job *drm_job)
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 7d9fc489dcb8..4f6784e5abf8 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -21,15 +21,25 @@ static const char *str_wake_sleep(bool wake)
return wake ? "wake" : "sleep";
}
-static void domain_init(struct xe_force_wake_domain *domain,
+static void mark_domain_initialized(struct xe_force_wake *fw,
+ enum xe_force_wake_domain_id id)
+{
+ fw->initialized_domains |= BIT(id);
+}
+
+static void init_domain(struct xe_force_wake *fw,
enum xe_force_wake_domain_id id,
struct xe_reg reg, struct xe_reg ack)
{
+ struct xe_force_wake_domain *domain = &fw->domains[id];
+
domain->id = id;
domain->reg_ctl = reg;
domain->reg_ack = ack;
domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL);
domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL);
+
+ mark_domain_initialized(fw, id);
}
void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
@@ -43,13 +53,11 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (xe->info.graphics_verx100 >= 1270) {
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
- XE_FW_DOMAIN_ID_GT,
+ init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
FORCEWAKE_ACK_GT_MTL);
} else {
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
- XE_FW_DOMAIN_ID_GT,
+ init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
FORCEWAKE_ACK_GT);
}
@@ -63,8 +71,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (!xe_gt_is_media_type(gt))
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
- XE_FW_DOMAIN_ID_RENDER,
+ init_domain(fw, XE_FW_DOMAIN_ID_RENDER,
FORCEWAKE_RENDER,
FORCEWAKE_ACK_RENDER);
@@ -72,8 +79,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
if (!(gt->info.engine_mask & BIT(i)))
continue;
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
- XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+ init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
FORCEWAKE_MEDIA_VDBOX(j),
FORCEWAKE_ACK_MEDIA_VDBOX(j));
}
@@ -82,15 +88,13 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
if (!(gt->info.engine_mask & BIT(i)))
continue;
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
- XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+ init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
FORCEWAKE_MEDIA_VEBOX(j),
FORCEWAKE_ACK_MEDIA_VEBOX(j));
}
if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC],
- XE_FW_DOMAIN_ID_GSC,
+ init_domain(fw, XE_FW_DOMAIN_ID_GSC,
FORCEWAKE_GSC,
FORCEWAKE_ACK_GSC);
}
@@ -100,7 +104,7 @@ static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain,
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
- xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0));
+ xe_mmio_write32(&gt->mmio, domain->reg_ctl, domain->mask | (wake ? domain->val : 0));
}
static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake)
@@ -111,7 +115,7 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain,
if (IS_SRIOV_VF(gt_to_xe(gt)))
return 0;
- ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0,
+ ret = xe_mmio_wait32(&gt->mmio, domain->reg_ack, domain->val, wake ? domain->val : 0,
XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
&value, true);
if (ret)
@@ -156,52 +160,108 @@ static int domain_sleep_wait(struct xe_gt *gt,
(ffs(tmp__) - 1))) && \
domain__->reg_ctl.addr)
-int xe_force_wake_get(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains)
+/**
+ * xe_force_wake_get() : Increase the domain refcount
+ * @fw: struct xe_force_wake
+ * @domains: forcewake domains to get refcount on
+ *
+ * This function wakes up @domains if they are asleep and takes references.
+ * If requested domain is XE_FORCEWAKE_ALL then only applicable/initialized
+ * domains will be considered for refcount and it is a caller responsibility
+ * to check returned ref if it includes any specific domain by using
+ * xe_force_wake_ref_has_domain() function. Caller must call
+ * xe_force_wake_put() function to decrease incremented refcounts.
+ *
+ * Return: opaque reference to woken domains or zero if none of requested
+ * domains were awake.
+ */
+unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains)
{
struct xe_gt *gt = fw->gt;
struct xe_force_wake_domain *domain;
- enum xe_force_wake_domains tmp, woken = 0;
+ unsigned int ref_incr = 0, awake_rqst = 0, awake_failed = 0;
+ unsigned int tmp, ref_rqst;
unsigned long flags;
- int ret = 0;
+ xe_gt_assert(gt, is_power_of_2(domains));
+ xe_gt_assert(gt, domains <= XE_FORCEWAKE_ALL);
+ xe_gt_assert(gt, domains == XE_FORCEWAKE_ALL || fw->initialized_domains & domains);
+
+ ref_rqst = (domains == XE_FORCEWAKE_ALL) ? fw->initialized_domains : domains;
spin_lock_irqsave(&fw->lock, flags);
- for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ for_each_fw_domain_masked(domain, ref_rqst, fw, tmp) {
if (!domain->ref++) {
- woken |= BIT(domain->id);
+ awake_rqst |= BIT(domain->id);
domain_wake(gt, domain);
}
+ ref_incr |= BIT(domain->id);
}
- for_each_fw_domain_masked(domain, woken, fw, tmp) {
- ret |= domain_wake_wait(gt, domain);
+ for_each_fw_domain_masked(domain, awake_rqst, fw, tmp) {
+ if (domain_wake_wait(gt, domain) == 0) {
+ fw->awake_domains |= BIT(domain->id);
+ } else {
+ awake_failed |= BIT(domain->id);
+ --domain->ref;
+ }
}
- fw->awake_domains |= woken;
+ ref_incr &= ~awake_failed;
spin_unlock_irqrestore(&fw->lock, flags);
- return ret;
+ xe_gt_WARN(gt, awake_failed, "Forcewake domain%s %#x failed to acknowledge awake request\n",
+ str_plural(hweight_long(awake_failed)), awake_failed);
+
+ if (domains == XE_FORCEWAKE_ALL && ref_incr == fw->initialized_domains)
+ ref_incr |= XE_FORCEWAKE_ALL;
+
+ return ref_incr;
}
-int xe_force_wake_put(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains)
+/**
+ * xe_force_wake_put - Decrement the refcount and put domain to sleep if refcount becomes 0
+ * @fw: Pointer to the force wake structure
+ * @fw_ref: return of xe_force_wake_get()
+ *
+ * This function reduces the reference counts for domains in fw_ref. If
+ * refcount for any of the specified domain reaches 0, it puts the domain to sleep
+ * and waits for acknowledgment for domain to sleep within 50 milisec timeout.
+ * Warns in case of timeout of ack from domain.
+ */
+void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref)
{
struct xe_gt *gt = fw->gt;
struct xe_force_wake_domain *domain;
- enum xe_force_wake_domains tmp, sleep = 0;
+ unsigned int tmp, sleep = 0;
unsigned long flags;
- int ret = 0;
+ int ack_fail = 0;
+
+ /*
+ * Avoid unnecessary lock and unlock when the function is called
+ * in error path of individual domains.
+ */
+ if (!fw_ref)
+ return;
+
+ if (xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ fw_ref = fw->initialized_domains;
spin_lock_irqsave(&fw->lock, flags);
- for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ for_each_fw_domain_masked(domain, fw_ref, fw, tmp) {
+ xe_gt_assert(gt, domain->ref);
+
if (!--domain->ref) {
sleep |= BIT(domain->id);
domain_sleep(gt, domain);
}
}
for_each_fw_domain_masked(domain, sleep, fw, tmp) {
- ret |= domain_sleep_wait(gt, domain);
+ if (domain_sleep_wait(gt, domain) == 0)
+ fw->awake_domains &= ~BIT(domain->id);
+ else
+ ack_fail |= BIT(domain->id);
}
- fw->awake_domains &= ~sleep;
spin_unlock_irqrestore(&fw->lock, flags);
- return ret;
+ xe_gt_WARN(gt, ack_fail, "Forcewake domain%s %#x failed to acknowledge sleep request\n",
+ str_plural(hweight_long(ack_fail)), ack_fail);
}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index a2577672f4e3..0e3e84bfa51c 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -15,10 +15,9 @@ void xe_force_wake_init_gt(struct xe_gt *gt,
struct xe_force_wake *fw);
void xe_force_wake_init_engines(struct xe_gt *gt,
struct xe_force_wake *fw);
-int xe_force_wake_get(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains);
-int xe_force_wake_put(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains);
+unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains);
+void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref);
static inline int
xe_force_wake_ref(struct xe_force_wake *fw,
@@ -46,4 +45,20 @@ xe_force_wake_assert_held(struct xe_force_wake *fw,
xe_gt_assert(fw->gt, fw->awake_domains & domain);
}
+/**
+ * xe_force_wake_ref_has_domain - verifies if the domains are in fw_ref
+ * @fw_ref : the force_wake reference
+ * @domain : forcewake domain to verify
+ *
+ * This function confirms whether the @fw_ref includes a reference to the
+ * specified @domain.
+ *
+ * Return: true if domain is refcounted.
+ */
+static inline bool
+xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains domain)
+{
+ return fw_ref & domain;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
index ed0edc2cdf9f..899fbbcb3ea9 100644
--- a/drivers/gpu/drm/xe/xe_force_wake_types.h
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -48,7 +48,7 @@ enum xe_force_wake_domains {
XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC),
- XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+ XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT)
};
/**
@@ -78,7 +78,9 @@ struct xe_force_wake {
/** @lock: protects everything force wake struct */
spinlock_t lock;
/** @awake_domains: mask of all domains awake */
- enum xe_force_wake_domains awake_domains;
+ unsigned int awake_domains;
+ /** @initialized_domains: mask of all initialized domains */
+ unsigned int initialized_domains;
/** @domains: force wake domains */
struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
};
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index ff19eca5d358..558fac8bb6fb 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -5,6 +5,7 @@
#include "xe_ggtt.h"
+#include <linux/fault-inject.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sizes.h>
@@ -107,8 +108,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
{
- struct xe_gt *gt = XE_WA(ggtt->tile->primary_gt, 22019338487) ? ggtt->tile->primary_gt :
- ggtt->tile->media_gt;
+ struct xe_tile *tile = ggtt->tile;
+ struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ?
+ tile->primary_gt : tile->media_gt;
+ struct xe_mmio *mmio = &affected_gt->mmio;
u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63;
/*
* Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit
@@ -118,7 +121,7 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
lockdep_assert_held(&ggtt->lock);
if ((++ggtt->access_count % max_gtt_writes) == 0) {
- xe_mmio_write32(gt, GMD_ID, 0x0);
+ xe_mmio_write32(mmio, GMD_ID, 0x0);
ggtt->access_count = 0;
}
}
@@ -243,7 +246,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
else
ggtt->pt_ops = &xelp_pt_ops;
- ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0);
+ ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM);
drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
ggtt->size - xe_wopcm_size(xe));
@@ -262,6 +265,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
@@ -405,7 +409,7 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
* vs. correct GGTT page. Not particularly a hot code path so blindly
* do a mmio read here which results in GuC reading correct GGTT page.
*/
- xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+ xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG);
/* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
@@ -609,7 +613,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u64 start, u64 end)
{
int err;
- u64 alignment = XE_PAGE_SIZE;
+ u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
alignment = SZ_64K;
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 6fbea70d3d36..1eb791ddc375 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -34,6 +34,7 @@
#include "instructions/xe_gsc_commands.h"
#include "regs/xe_gsc_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_irq_regs.h"
static struct xe_gt *
gsc_to_gt(struct xe_gsc *gsc)
@@ -179,7 +180,7 @@ out_bo:
static int gsc_fw_is_loaded(struct xe_gt *gt)
{
- return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
+ return xe_mmio_read32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
HECI1_FWSTS1_INIT_COMPLETE;
}
@@ -190,7 +191,7 @@ static int gsc_fw_wait(struct xe_gt *gt)
* executed by the GSCCS. To account for possible submission delays or
* other issues, we use a 500ms timeout in the wait here.
*/
- return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+ return xe_mmio_wait32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
HECI1_FWSTS1_INIT_COMPLETE,
HECI1_FWSTS1_INIT_COMPLETE,
500 * USEC_PER_MSEC, NULL, false);
@@ -260,19 +261,17 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_tile *tile = gt_to_tile(gt);
+ unsigned int fw_ref;
int ret;
if (XE_WA(tile->primary_gt, 14018094691)) {
- ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+ fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
/*
* If the forcewake fails we want to keep going, because the worst
* case outcome in failing to apply the WA is that PXP won't work,
- * which is not fatal. We still throw a warning so the issue is
- * seen if it happens.
+ * which is not fatal. Forcewake get warns implicitly in case of failure
*/
- xe_gt_WARN_ON(tile->primary_gt, ret);
-
xe_gt_mcr_multicast_write(tile->primary_gt,
EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK,
EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT);
@@ -281,7 +280,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
ret = gsc_upload(gsc);
if (XE_WA(tile->primary_gt, 14018094691))
- xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref);
if (ret)
return ret;
@@ -330,7 +329,7 @@ static int gsc_er_complete(struct xe_gt *gt)
* so in that scenario we're always guaranteed to find the correct
* value.
*/
- er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
+ er_status = xe_mmio_read32(&gt->mmio, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) {
/*
@@ -351,6 +350,7 @@ static void gsc_work(struct work_struct *work)
struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
u32 actions;
int ret;
@@ -360,7 +360,7 @@ static void gsc_work(struct work_struct *work)
spin_unlock_irq(&gsc->lock);
xe_pm_runtime_get(xe);
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
if (actions & GSC_ACTION_ER_COMPLETE) {
ret = gsc_er_complete(gt);
@@ -380,7 +380,7 @@ static void gsc_work(struct work_struct *work)
xe_gsc_proxy_request_handler(gsc);
out:
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
}
@@ -581,11 +581,11 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
return;
- xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
+ xe_mmio_rmw32(&gt->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
if (prep) {
/* make sure the reset bit is clear when writing the CSR reg */
- xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ xe_mmio_rmw32(&gt->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE),
HECI_H_CSR_RST, HECI_H_CSR_IG);
msleep(200);
}
@@ -599,7 +599,8 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
{
struct xe_gt *gt = gsc_to_gt(gsc);
- int err;
+ struct xe_mmio *mmio = &gt->mmio;
+ unsigned int fw_ref;
xe_uc_fw_print(&gsc->fw, p);
@@ -608,17 +609,17 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
if (!xe_uc_fw_is_enabled(&gsc->fw))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref)
return;
drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
- xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS2(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS3(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS4(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
-
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_mmio_read32(mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS2(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS3(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS4(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
+
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 2d6ea8c01445..fc64b45d324b 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -65,7 +65,7 @@ gsc_to_gt(struct xe_gsc *gsc)
bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
- u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
+ u32 fwsts1 = xe_mmio_read32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) ==
HECI1_FWSTS1_PROXY_STATE_NORMAL;
@@ -78,7 +78,7 @@ static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
/* make sure we never accidentally write the RST bit */
clr |= HECI_H_CSR_RST;
- xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
+ xe_mmio_rmw32(&gt->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
}
static void gsc_proxy_irq_clear(struct xe_gsc *gsc)
@@ -450,22 +450,21 @@ void xe_gsc_proxy_remove(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
- int err = 0;
+ unsigned int fw_ref = 0;
if (!gsc->proxy.component_added)
return;
/* disable HECI2 IRQs */
xe_pm_runtime_get(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref)
xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
/* try do disable irq even if forcewake failed */
gsc_proxy_irq_toggle(gsc, false);
- if (!err)
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
xe_gsc_wait_for_worker_completion(gsc);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d5fd6a089b7c..d6744be01a68 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -77,7 +77,8 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
return ERR_PTR(-ENOMEM);
gt->tile = tile;
- gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+ gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq",
+ WQ_MEM_RECLAIM);
err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
if (err)
@@ -97,14 +98,14 @@ void xe_gt_sanitize(struct xe_gt *gt)
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
{
+ unsigned int fw_ref;
u32 reg;
- int err;
if (!XE_WA(gt, 16023588340))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (WARN_ON(err))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
if (!xe_gt_is_media_type(gt)) {
@@ -114,13 +115,13 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
{
+ unsigned int fw_ref;
u32 reg;
- int err;
if (!XE_WA(gt, 16023588340))
return;
@@ -128,15 +129,15 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
if (xe_gt_is_media_type(gt))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (WARN_ON(err))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -244,7 +245,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
else if (entry->clr_bits + 1)
val = (reg.mcr ?
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
- xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+ xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
else
val = 0;
@@ -402,11 +403,14 @@ static void dump_pat_on_error(struct xe_gt *gt)
static int gt_fw_domain_init(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err, i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref) {
+ err = -ETIMEDOUT;
goto err_hw_fence_irq;
+ }
if (!xe_gt_is_media_type(gt)) {
err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
@@ -439,16 +443,14 @@ static int gt_fw_domain_init(struct xe_gt *gt)
* Stash hardware-reported version. Since this register does not exist
* on pre-MTL platforms, reading it there will (correctly) return 0.
*/
- gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
-
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
- XE_WARN_ON(err);
+ gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
err_force_wake:
dump_pat_on_error(gt);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
err_hw_fence_irq:
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -458,11 +460,14 @@ err_hw_fence_irq:
static int all_fw_domain_init(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err, i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
- goto err_hw_fence_irq;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ err = -ETIMEDOUT;
+ goto err_force_wake;
+ }
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
@@ -526,14 +531,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_init_hw(gt);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-err_hw_fence_irq:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -546,11 +549,12 @@ err_hw_fence_irq:
*/
int xe_gt_init_hwconfig(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto out;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
@@ -568,8 +572,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_enable_host_l2_vram(gt);
out_fw:
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-out:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return err;
}
@@ -622,6 +625,30 @@ int xe_gt_init(struct xe_gt *gt)
return 0;
}
+/**
+ * xe_gt_mmio_init() - Initialize GT's MMIO access
+ * @gt: the GT object
+ *
+ * Initialize GT's MMIO accessor, which will be used to access registers inside
+ * this GT.
+ */
+void xe_gt_mmio_init(struct xe_gt *gt)
+{
+ struct xe_tile *tile = gt_to_tile(gt);
+
+ gt->mmio.regs = tile->mmio.regs;
+ gt->mmio.regs_size = tile->mmio.regs_size;
+ gt->mmio.tile = tile;
+
+ if (gt->info.type == XE_GT_TYPE_MEDIA) {
+ gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
+ gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
+ }
+
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ gt->mmio.sriov_vf_gt = gt;
+}
+
void xe_gt_record_user_engines(struct xe_gt *gt)
{
struct xe_hw_engine *hwe;
@@ -649,8 +676,8 @@ static int do_gt_reset(struct xe_gt *gt)
xe_gsc_wa_14015076503(gt, true);
- xe_mmio_write32(gt, GDRST, GRDOM_FULL);
- err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
+ xe_mmio_write32(&gt->mmio, GDRST, GRDOM_FULL);
+ err = xe_mmio_wait32(&gt->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
if (err)
xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
ERR_PTR(err));
@@ -740,6 +767,7 @@ static int do_gt_restart(struct xe_gt *gt)
static int gt_reset(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
if (xe_device_wedged(gt_to_xe(gt)))
@@ -760,9 +788,11 @@ static int gt_reset(struct xe_gt *gt)
xe_gt_sanitize(gt);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
- goto err_msg;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ err = -ETIMEDOUT;
+ goto err_out;
+ }
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
@@ -780,8 +810,7 @@ static int gt_reset(struct xe_gt *gt)
if (err)
goto err_out;
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(gt_to_xe(gt));
xe_gt_info(gt, "reset done\n");
@@ -789,8 +818,7 @@ static int gt_reset(struct xe_gt *gt)
return 0;
err_out:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-err_msg:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
XE_WARN_ON(xe_uc_start(&gt->uc));
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
@@ -810,7 +838,7 @@ static void gt_reset_worker(struct work_struct *w)
void xe_gt_reset_async(struct xe_gt *gt)
{
- xe_gt_info(gt, "trying reset\n");
+ xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0));
/* Don't do a reset while one is already in flight */
if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
@@ -822,22 +850,25 @@ void xe_gt_reset_async(struct xe_gt *gt)
void xe_gt_suspend_prepare(struct xe_gt *gt)
{
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ unsigned int fw_ref;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_stop_prepare(&gt->uc);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
int xe_gt_suspend(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "suspending\n");
xe_gt_sanitize(gt);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_msg;
err = xe_uc_suspend(&gt->uc);
@@ -848,19 +879,29 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_disable_host_l2_vram(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "suspended\n");
return 0;
-err_force_wake:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
+ err = -ETIMEDOUT;
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
return err;
}
+void xe_gt_shutdown(struct xe_gt *gt)
+{
+ unsigned int fw_ref;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ do_gt_reset(gt);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+}
+
/**
* xe_gt_sanitize_freq() - Restore saved frequencies if necessary.
* @gt: the GT object
@@ -883,11 +924,12 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
int xe_gt_resume(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "resuming\n");
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_msg;
err = do_gt_restart(gt);
@@ -896,14 +938,15 @@ int xe_gt_resume(struct xe_gt *gt)
xe_gt_idle_enable_pg(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "resumed\n");
return 0;
-err_force_wake:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
+ err = -ETIMEDOUT;
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index ee138e9768a2..82b9b7f82fca 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -31,6 +31,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
int xe_gt_init_hwconfig(struct xe_gt *gt);
int xe_gt_init_early(struct xe_gt *gt);
int xe_gt_init(struct xe_gt *gt);
+void xe_gt_mmio_init(struct xe_gt *gt);
void xe_gt_declare_wedged(struct xe_gt *gt);
int xe_gt_record_default_lrcs(struct xe_gt *gt);
@@ -48,6 +49,7 @@ void xe_gt_record_user_engines(struct xe_gt *gt);
void xe_gt_suspend_prepare(struct xe_gt *gt);
int xe_gt_suspend(struct xe_gt *gt);
+void xe_gt_shutdown(struct xe_gt *gt);
int xe_gt_resume(struct xe_gt *gt);
void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index ffcbd05671fc..b6adfb9f2030 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -74,7 +74,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
* platforms as these bits are unused there.
*/
mode |= CCS_MODE_CSLICE_0_3_MASK << 16;
- xe_mmio_write32(gt, CCS_MODE, mode);
+ xe_mmio_write32(&gt->mmio, CCS_MODE, mode);
xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
mode, config, num_engines, num_slices);
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 86c2d62b4bdc..cc2ae159298e 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -17,7 +17,7 @@
static u32 read_reference_ts_freq(struct xe_gt *gt)
{
- u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE);
+ u32 ts_override = xe_mmio_read32(&gt->mmio, TIMESTAMP_OVERRIDE);
u32 base_freq, frac_freq;
base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
@@ -57,7 +57,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
int xe_gt_clock_init(struct xe_gt *gt)
{
- u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE);
+ u32 ctc_reg = xe_mmio_read32(&gt->mmio, CTC_MODE);
u32 freq = 0;
/* Assuming gen11+ so assert this assumption is correct */
@@ -66,7 +66,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
freq = read_reference_ts_freq(gt);
} else {
- u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0);
+ u32 c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
freq = get_crystal_clock_freq(c0);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 8f95d3a5949b..3e8c351a0eab 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -15,6 +15,7 @@
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
+#include "xe_gt_idle.h"
#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_sriov_vf_debugfs.h"
#include "xe_gt_stats.h"
@@ -89,26 +90,36 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- int err;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
xe_pm_runtime_put(xe);
- return err;
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
}
for_each_hw_engine(hwe, gt, id)
xe_hw_engine_print(hwe, p);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
- if (err)
- return err;
return 0;
}
+static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
+{
+ int ret;
+
+ xe_pm_runtime_get(gt_to_xe(gt));
+ ret = xe_gt_idle_pg_print(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
+
+ return ret;
+}
+
static int force_reset(struct xe_gt *gt, struct drm_printer *p)
{
xe_pm_runtime_get(gt_to_xe(gt));
@@ -288,6 +299,7 @@ static const struct drm_info_list debugfs_list[] = {
{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
{"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
+ {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index ab76973f3e1e..6bd39b2c5003 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -11,9 +11,9 @@
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
-#include "xe_device_types.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
+#include "xe_gt_types.h"
#include "xe_guc_pc.h"
#include "xe_pm.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 67aba4140510..fd80afeef56a 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -98,7 +98,10 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
void xe_gt_idle_enable_pg(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 pg_enable;
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ struct xe_mmio *mmio = &gt->mmio;
+ u32 vcs_mask, vecs_mask;
+ unsigned int fw_ref;
int i, j;
if (IS_SRIOV_VF(xe))
@@ -110,39 +113,136 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
xe_device_assert_mem_access(gt_to_xe(gt));
- pg_enable = RENDER_POWERGATE_ENABLE | MEDIA_POWERGATE_ENABLE;
+ vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+
+ if (vcs_mask || vecs_mask)
+ gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE;
+
+ if (!xe_gt_is_media_type(gt))
+ gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
if ((gt->info.engine_mask & BIT(i)))
- pg_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
- VDN_MFXVDENC_POWERGATE_ENABLE(j));
+ gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
+ VDN_MFXVDENC_POWERGATE_ENABLE(j));
}
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (xe->info.skip_guc_pc) {
/*
* GuC sets the hysteresis value when GuC PC is enabled
* else set it to 25 (25 * 1.28us)
*/
- xe_mmio_write32(gt, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25);
- xe_mmio_write32(gt, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
+ xe_mmio_write32(mmio, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25);
+ xe_mmio_write32(mmio, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
}
- xe_mmio_write32(gt, POWERGATE_ENABLE, pg_enable);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
void xe_gt_idle_disable_pg(struct xe_gt *gt)
{
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ unsigned int fw_ref;
+
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
xe_device_assert_mem_access(gt_to_xe(gt));
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ gtidle->powergate_enable = 0;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ xe_mmio_write32(&gt->mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+}
+
+/**
+ * xe_gt_idle_pg_print - Xe powergating info
+ * @gt: GT object
+ * @p: drm_printer.
+ *
+ * This function prints the powergating information
+ *
+ * Return: 0 on success, negative error code otherwise
+ */
+int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ struct xe_device *xe = gt_to_xe(gt);
+ enum xe_gt_idle_state state;
+ u32 pg_enabled, pg_status = 0;
+ u32 vcs_mask, vecs_mask;
+ unsigned int fw_ref;
+ int n;
+ /*
+ * Media Slices
+ *
+ * Slice 0: VCS0, VCS1, VECS0
+ * Slice 1: VCS2, VCS3, VECS1
+ * Slice 2: VCS4, VCS5, VECS2
+ * Slice 3: VCS6, VCS7, VECS3
+ */
+ static const struct {
+ u64 engines;
+ u32 status_bit;
+ } media_slices[] = {
+ {(BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) |
+ BIT(XE_HW_ENGINE_VECS0)), MEDIA_SLICE0_AWAKE_STATUS},
+
+ {(BIT(XE_HW_ENGINE_VCS2) | BIT(XE_HW_ENGINE_VCS3) |
+ BIT(XE_HW_ENGINE_VECS1)), MEDIA_SLICE1_AWAKE_STATUS},
- xe_mmio_write32(gt, POWERGATE_ENABLE, 0);
+ {(BIT(XE_HW_ENGINE_VCS4) | BIT(XE_HW_ENGINE_VCS5) |
+ BIT(XE_HW_ENGINE_VECS2)), MEDIA_SLICE2_AWAKE_STATUS},
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ {(BIT(XE_HW_ENGINE_VCS6) | BIT(XE_HW_ENGINE_VCS7) |
+ BIT(XE_HW_ENGINE_VECS3)), MEDIA_SLICE3_AWAKE_STATUS},
+ };
+
+ if (xe->info.platform == XE_PVC) {
+ drm_printf(p, "Power Gating not supported\n");
+ return 0;
+ }
+
+ state = gtidle->idle_status(gtidle_to_pc(gtidle));
+ pg_enabled = gtidle->powergate_enable;
+
+ /* Do not wake the GT to read powergating status */
+ if (state != GT_IDLE_C6) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
+
+ pg_enabled = xe_mmio_read32(&gt->mmio, POWERGATE_ENABLE);
+ pg_status = xe_mmio_read32(&gt->mmio, POWERGATE_DOMAIN_STATUS);
+
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ }
+
+ if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) {
+ drm_printf(p, "Render Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & RENDER_POWERGATE_ENABLE));
+
+ drm_printf(p, "Render Power Gate Status: %s\n",
+ str_up_down(pg_status & RENDER_AWAKE_STATUS));
+ }
+
+ vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+
+ /* Print media CPG status only if media is present */
+ if (vcs_mask || vecs_mask) {
+ drm_printf(p, "Media Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & MEDIA_POWERGATE_ENABLE));
+
+ for (n = 0; n < ARRAY_SIZE(media_slices); n++)
+ if (gt->info.engine_mask & media_slices[n].engines)
+ drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n,
+ str_up_down(pg_status & media_slices[n].status_bit));
+ }
+ return 0;
}
static ssize_t name_show(struct device *dev,
@@ -201,13 +301,14 @@ static void gt_idle_fini(void *arg)
{
struct kobject *kobj = arg;
struct xe_gt *gt = kobj_to_gt(kobj->parent);
+ unsigned int fw_ref;
xe_gt_idle_disable_pg(gt);
if (gt_to_xe(gt)->info.skip_guc_pc) {
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
xe_gt_idle_disable_c6(gt);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
sysfs_remove_files(kobj, gt_idle_attrs);
@@ -260,9 +361,9 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
return;
/* Units of 1280 ns for a total of 5s */
- xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA);
+ xe_mmio_write32(&gt->mmio, RC_IDLE_HYSTERSIS, 0x3B9ACA);
/* Enable RC6 */
- xe_mmio_write32(gt, RC_CONTROL,
+ xe_mmio_write32(&gt->mmio, RC_CONTROL,
RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE);
}
@@ -274,6 +375,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt)
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
- xe_mmio_write32(gt, RC_CONTROL, 0);
- xe_mmio_write32(gt, RC_STATE, 0);
+ xe_mmio_write32(&gt->mmio, RC_CONTROL, 0);
+ xe_mmio_write32(&gt->mmio, RC_STATE, 0);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 554447b5d46d..4455a6501cb0 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -8,6 +8,7 @@
#include "xe_gt_idle_types.h"
+struct drm_printer;
struct xe_gt;
int xe_gt_idle_init(struct xe_gt_idle *gtidle);
@@ -15,5 +16,6 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt);
void xe_gt_idle_disable_c6(struct xe_gt *gt);
void xe_gt_idle_enable_pg(struct xe_gt *gt);
void xe_gt_idle_disable_pg(struct xe_gt *gt);
+int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p);
#endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
index f99b447534f3..b8b297a3f884 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h
@@ -23,6 +23,8 @@ enum xe_gt_idle_state {
struct xe_gt_idle {
/** @name: name */
char name[16];
+ /** @powergate_enable: copy of powergate enable bits */
+ u32 powergate_enable;
/** @residency_multiplier: residency multiplier in ns */
u32 residency_multiplier;
/** @cur_residency: raw driver copy of idle residency */
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index c834f64b0178..5013d674e17d 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -237,13 +237,26 @@ static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = {
{},
};
+static const struct xe_mmio_range xe3lpm_instance0_steering_table[] = {
+ { 0x384000, 0x3847DF }, /* GAM, rsvd, GAM */
+ { 0x384900, 0x384AFF }, /* GAM */
+ { 0x389560, 0x3895FF }, /* MEDIAINF */
+ { 0x38B600, 0x38B8FF }, /* L3BANK */
+ { 0x38C800, 0x38D07F }, /* GAM, MEDIAINF */
+ { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, GAM */
+ { 0x393C00, 0x393C7F }, /* MEDIAINF */
+ {},
+};
+
static void init_steering_l3bank(struct xe_gt *gt)
{
+ struct xe_mmio *mmio = &gt->mmio;
+
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
- xe_mmio_read32(gt, XEHP_FUSE4));
+ xe_mmio_read32(mmio, XEHP_FUSE4));
/*
* Group selects mslice, instance selects bank within mslice.
@@ -254,7 +267,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
bank_mask & BIT(0) ? 0 : 2;
} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank = __ffs(mslice_mask) * 8;
/*
@@ -266,7 +279,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
gt->steering[L3BANK].instance_target = bank & 0x3;
} else {
u32 fuse = REG_FIELD_GET(L3BANK_MASK,
- ~xe_mmio_read32(gt, MIRROR_FUSE3));
+ ~xe_mmio_read32(mmio, MIRROR_FUSE3));
gt->steering[L3BANK].group_target = 0; /* unused */
gt->steering[L3BANK].instance_target = __ffs(fuse);
@@ -276,7 +289,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
static void init_steering_mslice(struct xe_gt *gt)
{
u32 mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
/*
* mslice registers are valid (not terminated) if either the meml3
@@ -352,6 +365,19 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
*instance = dss % gt->steering_dss_per_grp;
}
+/**
+ * xe_gt_mcr_steering_info_to_dss_id - Get DSS ID from group/instance steering
+ * @gt: GT structure
+ * @group: steering group ID
+ * @instance: steering instance ID
+ *
+ * Return: the coverted DSS id.
+ */
+u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance)
+{
+ return group * dss_per_group(gt) + instance;
+}
+
static void init_steering_dss(struct xe_gt *gt)
{
gt->steering_dss_per_grp = dss_per_group(gt);
@@ -380,7 +406,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
static void init_steering_sqidi_psmi(struct xe_gt *gt)
{
u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
u32 select = __ffs(mask);
gt->steering[SQIDI_PSMI].group_target = select >> 1;
@@ -439,7 +465,10 @@ void xe_gt_mcr_init(struct xe_gt *gt)
if (gt->info.type == XE_GT_TYPE_MEDIA) {
drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
- if (MEDIA_VERx100(xe) >= 1301) {
+ if (MEDIA_VER(xe) >= 30) {
+ gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
+ gt->steering[INSTANCE0].ranges = xe3lpm_instance0_steering_table;
+ } else if (MEDIA_VERx100(xe) >= 1301) {
gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table;
} else {
@@ -494,8 +523,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
- xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val);
- xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val);
+ xe_mmio_write32(&gt->mmio, MCFG_MCR_SELECTOR, steer_val);
+ xe_mmio_write32(&gt->mmio, SF_MCR_SELECTOR, steer_val);
/*
* For GAM registers, all reads should be directed to instance 1
* (unicast reads against other instances are not allowed),
@@ -533,7 +562,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
continue;
for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
- if (xe_mmio_in_range(gt, &gt->steering[type].ranges[i], reg)) {
+ if (xe_mmio_in_range(&gt->mmio, &gt->steering[type].ranges[i], reg)) {
*group = gt->steering[type].group_target;
*instance = gt->steering[type].instance_target;
return true;
@@ -544,7 +573,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
if (implicit_ranges)
for (int i = 0; implicit_ranges[i].end > 0; i++)
- if (xe_mmio_in_range(gt, &implicit_ranges[i], reg))
+ if (xe_mmio_in_range(&gt->mmio, &implicit_ranges[i], reg))
return false;
/*
@@ -579,7 +608,7 @@ static void mcr_lock(struct xe_gt *gt) __acquires(&gt->mcr_lock)
* when a read to the relevant register returns 1.
*/
if (GRAPHICS_VERx100(xe) >= 1270)
- ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
+ ret = xe_mmio_wait32(&gt->mmio, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
true);
drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
@@ -589,7 +618,7 @@ static void mcr_unlock(struct xe_gt *gt) __releases(&gt->mcr_lock)
{
/* Release hardware semaphore - this is done by writing 1 to the register */
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
- xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+ xe_mmio_write32(&gt->mmio, STEER_SEMAPHORE, 0x1);
spin_unlock(&gt->mcr_lock);
}
@@ -603,6 +632,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
u8 rw_flag, int group, int instance, u32 value)
{
const struct xe_reg reg = to_xe_reg(reg_mcr);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_reg steer_reg;
u32 steer_val, val = 0;
@@ -635,12 +665,12 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
if (rw_flag == MCR_OP_READ)
steer_val |= MCR_MULTICAST;
- xe_mmio_write32(gt, steer_reg, steer_val);
+ xe_mmio_write32(mmio, steer_reg, steer_val);
if (rw_flag == MCR_OP_READ)
- val = xe_mmio_read32(gt, reg);
+ val = xe_mmio_read32(mmio, reg);
else
- xe_mmio_write32(gt, reg, value);
+ xe_mmio_write32(mmio, reg, value);
/*
* If we turned off the multicast bit (during a write) we're required
@@ -649,7 +679,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
* operation.
*/
if (rw_flag == MCR_OP_WRITE)
- xe_mmio_write32(gt, steer_reg, MCR_MULTICAST);
+ xe_mmio_write32(mmio, steer_reg, MCR_MULTICAST);
return val;
}
@@ -684,7 +714,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
group, instance, 0);
mcr_unlock(gt);
} else {
- val = xe_mmio_read32(gt, reg);
+ val = xe_mmio_read32(&gt->mmio, reg);
}
return val;
@@ -757,7 +787,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
* to touch the steering register.
*/
mcr_lock(gt);
- xe_mmio_write32(gt, reg, value);
+ xe_mmio_write32(&gt->mmio, reg, value);
mcr_unlock(gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index 8d119a0d5493..c0cd36021c24 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -28,6 +28,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
+u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
/*
* Loop over each DSS and determine the group and instance IDs that
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index d6228baaff1e..5dc71394372d 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -8,7 +8,7 @@
#include <drm/drm_print.h>
-#include "xe_device_types.h"
+#include "xe_gt_types.h"
#define xe_gt_printk(_gt, _level, _fmt, ...) \
drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 905f409db74b..e71fc3d2bda2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -5,12 +5,15 @@
#include <drm/drm_managed.h>
+#include "regs/xe_guc_regs.h"
#include "regs/xe_regs.h"
+#include "xe_gt.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_mmio.h"
@@ -72,7 +75,7 @@ static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
static void pf_enable_ggtt_guest_update(struct xe_gt *gt)
{
- xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
+ xe_mmio_write32(&gt->mmio, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
}
/**
@@ -87,6 +90,57 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
pf_enable_ggtt_guest_update(gt);
xe_gt_sriov_pf_service_update(gt);
+ xe_gt_sriov_pf_migration_init(gt);
+}
+
+static u32 pf_get_vf_regs_stride(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
+}
+
+static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride)
+{
+ struct xe_reg pf_reg = vf_reg;
+
+ pf_reg.vf = 0;
+ pf_reg.addr += stride * vfid;
+
+ return pf_reg;
+}
+
+static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt));
+ struct xe_reg scratch;
+ int n, count;
+
+ if (xe_gt_is_media_type(gt)) {
+ count = MED_VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(&gt->mmio, scratch, 0);
+ }
+ } else {
+ count = VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(&gt->mmio, scratch, 0);
+ }
+ }
+}
+
+/**
+ * xe_gt_sriov_pf_sanitize_hw() - Reset hardware state related to a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ pf_clear_vf_scratch_regs(gt, vfid);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f0cb726a6919..96fab779a906 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -11,6 +11,7 @@ struct xe_gt;
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index afdb477ecf83..192643d63d22 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -34,6 +34,8 @@
#include "xe_ttm_vram_mgr.h"
#include "xe_wopcm.h"
+#define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
+
/*
* Return: number of KLVs that were successfully parsed and saved,
* negative error code on failure.
@@ -229,14 +231,16 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i
}
/* Return: number of configuration dwords written */
-static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config)
+static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
{
u32 n = 0;
if (xe_ggtt_node_allocated(config->ggtt_region)) {
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
- cfg[n++] = lower_32_bits(config->ggtt_region->base.start);
- cfg[n++] = upper_32_bits(config->ggtt_region->base.start);
+ if (details) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
+ cfg[n++] = lower_32_bits(config->ggtt_region->base.start);
+ cfg[n++] = upper_32_bits(config->ggtt_region->base.start);
+ }
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE);
cfg[n++] = lower_32_bits(config->ggtt_region->base.size);
@@ -247,20 +251,24 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config)
}
/* Return: number of configuration dwords written */
-static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config)
+static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
{
u32 n = 0;
- n += encode_config_ggtt(cfg, config);
+ n += encode_config_ggtt(cfg, config, details);
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID);
- cfg[n++] = config->begin_ctx;
+ if (details) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID);
+ cfg[n++] = config->begin_ctx;
+ }
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS);
cfg[n++] = config->num_ctxs;
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID);
- cfg[n++] = config->begin_db;
+ if (details) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID);
+ cfg[n++] = config->begin_db;
+ }
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS);
cfg[n++] = config->num_dbs;
@@ -301,7 +309,7 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
if (!cfg)
return -ENOMEM;
- num_dwords = encode_config(cfg, config);
+ num_dwords = encode_config(cfg, config, true);
xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
if (xe_gt_is_media_type(gt)) {
@@ -309,10 +317,10 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
struct xe_gt_sriov_config *other = pf_pick_vf_config(primary, vfid);
/* media-GT will never include a GGTT config */
- xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config));
+ xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true));
/* the GGTT config must be taken from the primary-GT instead */
- num_dwords += encode_config_ggtt(cfg + num_dwords, other);
+ num_dwords += encode_config_ggtt(cfg + num_dwords, other, true);
}
xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
@@ -2044,7 +2052,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
valid_all = valid_all && valid_lmem;
}
- return valid_all ? 1 : valid_any ? -ENOKEY : -ENODATA;
+ return valid_all ? 0 : valid_any ? -ENOKEY : -ENODATA;
}
/**
@@ -2071,6 +2079,174 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
}
/**
+ * xe_gt_sriov_pf_config_save - Save a VF provisioning config as binary blob.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @buf: the buffer to save a config to (or NULL if query the buf size)
+ * @size: the size of the buffer (or 0 if query the buf size)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: mininum size of the buffer or the number of bytes saved,
+ * or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size)
+{
+ struct xe_gt_sriov_config *config;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, !(!buf ^ !size));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ ret = pf_validate_vf_config(gt, vfid);
+ if (!size) {
+ ret = ret ? 0 : SZ_4K;
+ } else if (!ret) {
+ if (size < SZ_4K) {
+ ret = -ENOBUFS;
+ } else {
+ config = pf_pick_vf_config(gt, vfid);
+ ret = encode_config(buf, config, false) * sizeof(u32);
+ }
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return ret;
+}
+
+static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid,
+ u32 key, u32 len, const u32 *value)
+{
+ switch (key) {
+ case GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY:
+ if (len != GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_ctxs(gt, vfid, value[0]);
+
+ case GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY:
+ if (len != GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_dbs(gt, vfid, value[0]);
+
+ case GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY:
+ if (len != GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN)
+ return -EBADMSG;
+ return pf_provision_exec_quantum(gt, vfid, value[0]);
+
+ case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY:
+ if (len != GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN)
+ return -EBADMSG;
+ return pf_provision_preempt_timeout(gt, vfid, value[0]);
+
+ /* auto-generate case statements */
+#define define_threshold_key_to_provision_case(TAG, ...) \
+ case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG): \
+ BUILD_BUG_ON(MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) != 1u); \
+ if (len != MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG)) \
+ return -EBADMSG; \
+ return pf_provision_threshold(gt, vfid, \
+ MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG), \
+ value[0]);
+
+ MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_threshold_key_to_provision_case)
+#undef define_threshold_key_to_provision_case
+ }
+
+ if (xe_gt_is_media_type(gt))
+ return -EKEYREJECTED;
+
+ switch (key) {
+ case GUC_KLV_VF_CFG_GGTT_SIZE_KEY:
+ if (len != GUC_KLV_VF_CFG_GGTT_SIZE_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_ggtt(gt, vfid, make_u64_from_u32(value[1], value[0]));
+
+ case GUC_KLV_VF_CFG_LMEM_SIZE_KEY:
+ if (!IS_DGFX(gt_to_xe(gt)))
+ return -EKEYREJECTED;
+ if (len != GUC_KLV_VF_CFG_LMEM_SIZE_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_lmem(gt, vfid, make_u64_from_u32(value[1], value[0]));
+ }
+
+ return -EKEYREJECTED;
+}
+
+static int pf_restore_vf_config(struct xe_gt *gt, unsigned int vfid,
+ const u32 *klvs, size_t num_dwords)
+{
+ int err;
+
+ while (num_dwords >= GUC_KLV_LEN_MIN) {
+ u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]);
+ u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]);
+
+ klvs += GUC_KLV_LEN_MIN;
+ num_dwords -= GUC_KLV_LEN_MIN;
+
+ if (num_dwords < len)
+ err = -EBADMSG;
+ else
+ err = pf_restore_vf_config_klv(gt, vfid, key, len, klvs);
+
+ if (err) {
+ xe_gt_sriov_dbg(gt, "restore failed on key %#x (%pe)\n", key, ERR_PTR(err));
+ return err;
+ }
+
+ klvs += len;
+ num_dwords -= len;
+ }
+
+ return pf_validate_vf_config(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_restore - Restore a VF provisioning config from binary blob.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @buf: the buffer with config data
+ * @size: the size of the config data
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
+ const void *buf, size_t size)
+{
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid);
+
+ if (!size)
+ return -ENODATA;
+
+ if (size % sizeof(u32))
+ return -EINVAL;
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ struct drm_printer p = xe_gt_info_printer(gt);
+
+ drm_printf(&p, "restoring VF%u config:\n", vfid);
+ xe_guc_klv_print(buf, size / sizeof(u32), &p);
+ }
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_send_vf_cfg_reset(gt, vfid);
+ if (!err) {
+ pf_release_vf_config(gt, vfid);
+ err = pf_restore_vf_config(gt, vfid, buf, size / sizeof(u32));
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return err;
+}
+
+/**
* xe_gt_sriov_pf_config_restart - Restart SR-IOV configurations after a GT reset.
* @gt: the &xe_gt
*
@@ -2203,6 +2379,41 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p)
}
/**
+ * xe_gt_sriov_pf_config_print_lmem - Print LMEM configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print LMEM allocations across all VFs.
+ * VFs without LMEM allocation are skipped.
+ *
+ * This function can only be called on PF.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+ const struct xe_gt_sriov_config *config;
+ char buf[10];
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 1; n <= total_vfs; n++) {
+ config = &gt->sriov.pf.vfs[n].config;
+ if (!config->lmem_obj)
+ continue;
+
+ string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2,
+ buf, sizeof(buf));
+ drm_printf(p, "VF%u:\t%zu\t(%s)\n",
+ n, config->lmem_obj->size, buf);
+ }
+
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return 0;
+}
+
+/**
* xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges.
* @gt: the &xe_gt
* @p: the &drm_printer
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 42e64769f666..0c55aa40a1a7 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -54,6 +54,10 @@ int xe_gt_sriov_pf_config_sanitize(struct xe_gt *gt, unsigned int vfid, long tim
int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force);
int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh);
+ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size);
+int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
+ const void *buf, size_t size);
+
bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid);
void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
@@ -61,6 +65,7 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
index 02f7328bd6ce..1f50aec3a059 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -9,9 +9,11 @@
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_gt_sriov_printk.h"
@@ -176,6 +178,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
CASE2STR(PAUSE_SEND_PAUSE);
CASE2STR(PAUSE_WAIT_GUC);
CASE2STR(PAUSE_GUC_DONE);
+ CASE2STR(PAUSE_SAVE_GUC);
CASE2STR(PAUSE_FAILED);
CASE2STR(PAUSED);
CASE2STR(RESUME_WIP);
@@ -415,6 +418,10 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
* : | : /
* : v : /
* : PAUSE_GUC_DONE o-----restart
+ * : | :
+ * : | o---<--busy :
+ * : v / / :
+ * : PAUSE_SAVE_GUC :
* : / :
* : / :
* :....o..............o...............o...........:
@@ -434,6 +441,7 @@ static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
}
}
@@ -464,12 +472,41 @@ static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
pf_enter_vf_pause_failed(gt, vfid);
}
+static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+}
+
+static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
+{
+ int err;
+
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
+ return false;
+
+ err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
+ if (err) {
+ /* retry if busy */
+ if (err == -EBUSY) {
+ pf_enter_vf_pause_save_guc(gt, vfid);
+ return true;
+ }
+ /* give up on error */
+ if (err == -EIO)
+ pf_enter_vf_mismatch(gt, vfid);
+ }
+
+ pf_enter_vf_pause_completed(gt, vfid);
+ return true;
+}
+
static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
return false;
- pf_enter_vf_pause_completed(gt, vfid);
+ pf_enter_vf_pause_save_guc(gt, vfid);
return true;
}
@@ -1008,7 +1045,7 @@ static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
return false;
- /* XXX: placeholder */
+ xe_gt_sriov_pf_sanitize_hw(gt, vfid);
pf_enter_vf_flr_send_finish(gt, vfid);
return true;
@@ -1338,6 +1375,9 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
if (pf_exit_vf_pause_guc_done(gt, vfid))
return true;
+ if (pf_exit_vf_pause_save_guc(gt, vfid))
+ return true;
+
if (pf_exit_vf_resume_send_resume(gt, vfid))
return true;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
index 11830aafea45..f02f941b4ad2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
@@ -27,6 +27,7 @@
* @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command.
* @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC.
* @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC.
+ * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state.
* @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed.
* @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused.
* @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress.
@@ -56,6 +57,7 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE,
XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC,
XE_GT_SRIOV_STATE_PAUSE_GUC_DONE,
+ XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC,
XE_GT_SRIOV_STATE_PAUSE_FAILED,
XE_GT_SRIOV_STATE_PAUSED,
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 2290ddaf9594..05df4ab3514b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -17,6 +17,7 @@
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_sriov_pf_policy.h"
#include "xe_gt_sriov_pf_service.h"
@@ -81,6 +82,11 @@ static const struct drm_info_list pf_info[] = {
.data = xe_gt_sriov_pf_config_print_dbs,
},
{
+ "lmem_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_lmem,
+ },
+ {
"runtime_registers",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_service_print_runtime,
@@ -312,6 +318,9 @@ static const struct {
{ "stop", xe_gt_sriov_pf_control_stop_vf },
{ "pause", xe_gt_sriov_pf_control_pause_vf },
{ "resume", xe_gt_sriov_pf_control_resume_vf },
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+ { "restore!", xe_gt_sriov_pf_migration_restore_guc_state },
+#endif
};
static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
@@ -375,6 +384,119 @@ static const struct file_operations control_ops = {
.llseek = default_llseek,
};
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── vf1
+ * │   │   ├── guc_state
+ */
+static ssize_t guc_state_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+
+ return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos);
+}
+
+static ssize_t guc_state_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+
+ if (*pos)
+ return -EINVAL;
+
+ return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count);
+}
+
+static const struct file_operations guc_state_ops = {
+ .owner = THIS_MODULE,
+ .read = guc_state_read,
+ .write = guc_state_write,
+ .llseek = default_llseek,
+};
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── vf1
+ * │   │   ├── config_blob
+ */
+static ssize_t config_blob_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+ ssize_t ret;
+ void *tmp;
+
+ ret = xe_gt_sriov_pf_config_save(gt, vfid, NULL, 0);
+ if (!ret)
+ return -ENODATA;
+ if (ret < 0)
+ return ret;
+
+ tmp = kzalloc(ret, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ ret = xe_gt_sriov_pf_config_save(gt, vfid, tmp, ret);
+ if (ret > 0)
+ ret = simple_read_from_buffer(buf, count, pos, tmp, ret);
+
+ kfree(tmp);
+ return ret;
+}
+
+static ssize_t config_blob_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+ ssize_t ret;
+ void *tmp;
+
+ if (*pos)
+ return -EINVAL;
+
+ if (!count)
+ return -ENODATA;
+
+ if (count > SZ_4K)
+ return -EINVAL;
+
+ tmp = kzalloc(count, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (copy_from_user(tmp, buf, count)) {
+ ret = -EFAULT;
+ } else {
+ ret = xe_gt_sriov_pf_config_restore(gt, vfid, tmp, count);
+ if (!ret)
+ ret = count;
+ }
+ kfree(tmp);
+ return ret;
+}
+
+static const struct file_operations config_blob_ops = {
+ .owner = THIS_MODULE,
+ .read = config_blob_read,
+ .write = config_blob_write,
+ .llseek = default_llseek,
+};
+
/**
* xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
* @gt: the &xe_gt to register
@@ -423,5 +545,15 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
pf_add_config_attrs(gt, vfdentry, VFID(n));
debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops);
+
+ /* for testing/debugging purposes only! */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ debugfs_create_file("guc_state",
+ IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
+ vfdentry, NULL, &guc_state_ops);
+ debugfs_create_file("config_blob",
+ IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
+ vfdentry, NULL, &config_blob_ops);
+ }
}
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
new file mode 100644
index 000000000000..c712111aa30d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "xe_bo.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_sriov.h"
+
+/* Return: number of dwords saved/restored/required or a negative error code on failure */
+static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode,
+ u64 addr, u32 ndwords)
+{
+ u32 request[PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_SAVE_RESTORE_VF) |
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE, opcode),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID, vfid),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO, lower_32_bits(addr)),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI, upper_32_bits(addr)),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE, ndwords),
+ };
+
+ return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+}
+
+/* Return: size of the state in dwords or a negative error code on failure */
+static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ ret = guc_action_vf_save_restore(&gt->uc.guc, vfid, GUC_PF_OPCODE_VF_SAVE, 0, 0);
+ return ret ?: -ENODATA;
+}
+
+/* Return: number of state dwords saved or a negative error code on failure */
+static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
+ void *buff, size_t size)
+{
+ const int ndwords = size / sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ xe_gt_assert(gt, size % sizeof(u32) == 0);
+ xe_gt_assert(gt, size == ndwords * sizeof(u32));
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
+ xe_bo_ggtt_addr(bo), ndwords);
+ if (!ret)
+ ret = -ENODATA;
+ else if (ret > ndwords)
+ ret = -EPROTO;
+ else if (ret > 0)
+ xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
+
+ xe_bo_unpin_map_no_vm(bo);
+ return ret;
+}
+
+/* Return: number of state dwords restored or a negative error code on failure */
+static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
+ const void *buff, size_t size)
+{
+ const int ndwords = size / sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ xe_gt_assert(gt, size % sizeof(u32) == 0);
+ xe_gt_assert(gt, size == ndwords * sizeof(u32));
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
+
+ ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
+ xe_bo_ggtt_addr(bo), ndwords);
+ if (!ret)
+ ret = -ENODATA;
+ else if (ret > ndwords)
+ ret = -EPROTO;
+
+ xe_bo_unpin_map_no_vm(bo);
+ return ret;
+}
+
+static bool pf_migration_supported(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ return gt->sriov.pf.migration.supported;
+}
+
+static struct mutex *pf_migration_mutex(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ return &gt->sriov.pf.migration.snapshot_lock;
+}
+
+static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt,
+ unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+ lockdep_assert_held(pf_migration_mutex(gt));
+
+ return &gt->sriov.pf.vfs[vfid].snapshot;
+}
+
+static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs;
+}
+
+static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ drmm_kfree(&xe->drm, snapshot->guc.buff);
+ snapshot->guc.buff = NULL;
+ snapshot->guc.size = 0;
+}
+
+static int pf_alloc_guc_state(struct xe_gt *gt,
+ struct xe_gt_sriov_state_snapshot *snapshot,
+ size_t size)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ void *p;
+
+ pf_free_guc_state(gt, snapshot);
+
+ if (!size)
+ return -ENODATA;
+
+ if (size % sizeof(u32))
+ return -EINVAL;
+
+ if (size > SZ_2M)
+ return -EFBIG;
+
+ p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ snapshot->guc.buff = p;
+ snapshot->guc.size = size;
+ return 0;
+}
+
+static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot);
+
+ xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n",
+ vfid, snapshot->guc.size / sizeof(u32));
+ print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET,
+ snapshot->guc.buff, min(SZ_64, snapshot->guc.size));
+ }
+}
+
+static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ size_t size;
+ int ret;
+
+ ret = pf_send_guc_query_vf_state_size(gt, vfid);
+ if (ret < 0)
+ goto fail;
+ size = ret * sizeof(u32);
+ xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size);
+
+ ret = pf_alloc_guc_state(gt, snapshot, size);
+ if (ret < 0)
+ goto fail;
+
+ ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size);
+ if (ret < 0)
+ goto fail;
+ size = ret * sizeof(u32);
+ xe_gt_assert(gt, size);
+ xe_gt_assert(gt, size <= snapshot->guc.size);
+ snapshot->guc.size = size;
+
+ pf_dump_guc_state(gt, snapshot);
+ return 0;
+
+fail:
+ xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret));
+ pf_free_guc_state(gt, snapshot);
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ err = pf_save_vf_guc_state(gt, vfid);
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return err;
+}
+
+static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ int ret;
+
+ if (!snapshot->guc.size)
+ return -ENODATA;
+
+ xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n",
+ snapshot->guc.size / sizeof(u32), vfid);
+ ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size);
+ if (ret < 0)
+ goto fail;
+
+ xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid);
+ return 0;
+
+fail:
+ xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret));
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ ret = pf_restore_vf_guc_state(gt, vfid);
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer to read to
+ * @count: the maximum number of bytes to read
+ * @pos: the current position in the buffer
+ *
+ * This function is for PF only.
+ *
+ * This function reads up to @count bytes from the saved VF GuC state buffer
+ * at offset @pos into the user space address starting at @buf.
+ *
+ * Return: the number of bytes read or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
+ char __user *buf, size_t count, loff_t *pos)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ snapshot = pf_pick_vf_snapshot(gt, vfid);
+ if (snapshot->guc.size)
+ ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff,
+ snapshot->guc.size);
+ else
+ ret = -ENODATA;
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer with GuC VF state
+ * @size: the size of GuC VF state (in bytes)
+ *
+ * This function is for PF only.
+ *
+ * This function reads @size bytes of the VF GuC state stored at user space
+ * address @buf and writes it into a internal VF state buffer.
+ *
+ * Return: the number of bytes used or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
+ const char __user *buf, size_t size)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot;
+ loff_t pos = 0;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ snapshot = pf_pick_vf_snapshot(gt, vfid);
+ ret = pf_alloc_guc_state(gt, snapshot, size);
+ if (!ret) {
+ ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size);
+ if (ret < 0)
+ pf_free_guc_state(gt, snapshot);
+ else
+ pf_dump_guc_state(gt, snapshot);
+ }
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static bool pf_check_migration_support(struct xe_gt *gt)
+{
+ /* GuC 70.25 with save/restore v2 is required */
+ xe_gt_assert(gt, GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 25, 0));
+
+ /* XXX: for now this is for feature enabling only */
+ return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_init() - Initialize support for VF migration.
+ * @gt: the &xe_gt
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+
+ gt->sriov.pf.migration.supported = pf_check_migration_support(gt);
+
+ if (!pf_migration_supported(gt))
+ return 0;
+
+ err = drmm_mutex_init(&xe->drm, &gt->sriov.pf.migration.snapshot_lock);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
new file mode 100644
index 000000000000..09faeae00ddb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MIGRATION_H_
+#define _XE_GT_SRIOV_PF_MIGRATION_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
+int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid);
+
+#ifdef CONFIG_DEBUG_FS
+ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
+ char __user *buf, size_t count, loff_t *pos);
+ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
+ const char __user *buf, size_t count);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
new file mode 100644
index 000000000000..1f3110b6d44f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
+#define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/**
+ * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data.
+ *
+ * Used by the PF driver to maintain per-VF migration data.
+ */
+struct xe_gt_sriov_state_snapshot {
+ /** @guc: GuC VF state snapshot */
+ struct {
+ /** @guc.buff: buffer with the VF state */
+ u32 *buff;
+ /** @guc.size: size of the buffer (must be dwords aligned) */
+ u32 size;
+ } guc;
+};
+
+/**
+ * struct xe_gt_sriov_pf_migration - GT-level data.
+ *
+ * Used by the PF driver to maintain non-VF specific per-GT data.
+ */
+struct xe_gt_sriov_pf_migration {
+ /** @supported: indicates whether the feature is supported */
+ bool supported;
+
+ /** @snapshot_lock: protects all VFs snapshots */
+ struct mutex snapshot_lock;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
index 0e23b7ea4f3e..924e75b94aec 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -237,7 +237,7 @@ static void read_many(struct xe_gt *gt, unsigned int count,
const struct xe_reg *regs, u32 *values)
{
while (count--)
- *values++ = xe_mmio_read32(gt, *regs++);
+ *values++ = xe_mmio_read32(&gt->mmio, *regs++);
}
static void pf_prepare_runtime_info(struct xe_gt *gt)
@@ -402,7 +402,7 @@ static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit,
for (i = 0; i < count; ++i, ++data) {
addr = runtime->regs[start + i].addr;
- data->offset = xe_mmio_adjusted_addr(gt, addr);
+ data->offset = xe_mmio_adjusted_addr(&gt->mmio, addr);
data->value = runtime->values[start + i];
}
@@ -513,7 +513,7 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p
for (; size--; regs++, values++) {
drm_printf(p, "reg[%#x] = %#x\n",
- xe_mmio_adjusted_addr(gt, regs->addr), *values);
+ xe_mmio_adjusted_addr(&gt->mmio, regs->addr), *values);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index 28e1b130bf87..0426b1a77069 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -10,6 +10,7 @@
#include "xe_gt_sriov_pf_config_types.h"
#include "xe_gt_sriov_pf_control_types.h"
+#include "xe_gt_sriov_pf_migration_types.h"
#include "xe_gt_sriov_pf_monitor_types.h"
#include "xe_gt_sriov_pf_policy_types.h"
#include "xe_gt_sriov_pf_service_types.h"
@@ -29,6 +30,9 @@ struct xe_gt_sriov_metadata {
/** @version: negotiated VF/PF ABI version */
struct xe_gt_sriov_pf_service_version version;
+
+ /** @snapshot: snapshot of the VF state data */
+ struct xe_gt_sriov_state_snapshot snapshot;
};
/**
@@ -36,6 +40,7 @@ struct xe_gt_sriov_metadata {
* @service: service data.
* @control: control data.
* @policy: policy data.
+ * @migration: migration data.
* @spare: PF-only provisioning configuration.
* @vfs: metadata for all VFs.
*/
@@ -43,6 +48,7 @@ struct xe_gt_sriov_pf {
struct xe_gt_sriov_pf_service service;
struct xe_gt_sriov_pf_control control;
struct xe_gt_sriov_pf_policy policy;
+ struct xe_gt_sriov_pf_migration migration;
struct xe_gt_sriov_spare_config spare;
struct xe_gt_sriov_metadata *vfs;
};
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 4ebc82e607af..d3baba50f085 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -881,7 +881,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
*/
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
struct vf_runtime_reg *rr;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
@@ -917,7 +917,7 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
*/
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
xe_gt_assert(gt, !reg.vf);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
index f3ddcbefc6bc..2ed5b6780d30 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
@@ -33,7 +33,7 @@ static const struct drm_info_list vf_info[] = {
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_vf_print_version,
},
-#if defined(CONFIG_DRM_XE_DEBUG) || defined(CONFIG_DRM_XE_DEBUG_SRIOV)
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) || IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)
{
"runtime_regs",
.show = xe_gt_debugfs_simple_show,
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 25963e33a383..03b225364101 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -41,9 +41,9 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
xe_pm_runtime_get(gt_to_xe(gt));
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_PERF_LIMIT_REASONS);
else
- reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+ reg = xe_mmio_read32(&gt->mmio, GT0_PERF_LIMIT_REASONS);
xe_pm_runtime_put(gt_to_xe(gt));
return reg;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 9d82ea30f4df..3cb228c773cd 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -270,6 +270,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
gt->uc.guc.submission_state.enabled) {
@@ -283,20 +284,22 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
+ struct xe_mmio *mmio = &gt->mmio;
+
if (IS_SRIOV_VF(xe))
return 0;
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
PVC_GUC_TLB_INV_DESC1_INVALIDATE);
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
PVC_GUC_TLB_INV_DESC0_VALID);
} else {
- xe_mmio_write32(gt, GUC_TLB_INV_CR,
+ xe_mmio_write32(mmio, GUC_TLB_INV_CR,
GUC_TLB_INV_CR_INVALIDATE);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 0662f71c6ede..df2042db7ee6 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -5,6 +5,7 @@
#include "xe_gt_topology.h"
+#include <generated/xe_wa_oob.h>
#include <linux/bitmap.h>
#include <linux/compiler.h>
@@ -12,6 +13,7 @@
#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_mmio.h"
+#include "xe_wa.h"
static void
load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
@@ -25,7 +27,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
va_start(argp, numregs);
for (i = 0; i < numregs; i++)
- fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
+ fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg));
va_end(argp);
bitmap_from_arr32(mask, fuse_val, numregs * 32);
@@ -35,7 +37,7 @@ static void
load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
+ u32 reg_val = xe_mmio_read32(&gt->mmio, XELP_EU_ENABLE);
u32 val = 0;
int i;
@@ -127,7 +129,19 @@ static void
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
+ u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
+
+ /*
+ * PTL platforms with media version 30.00 do not provide proper values
+ * for the media GT's L3 bank registers. Skip the readout since we
+ * don't have any way to obtain real values.
+ *
+ * This may get re-described as an official workaround in the future,
+ * but there's no tracking number assigned yet so we use a custom
+ * OOB workaround descriptor.
+ */
+ if (XE_WA(gt, no_media_l3))
+ return;
if (GRAPHICS_VER(xe) >= 20) {
xe_l3_bank_mask_t per_node = {};
@@ -141,7 +155,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
xe_l3_bank_mask_t per_node = {};
xe_l3_bank_mask_t per_mask_bit = {};
u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
- u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
+ u32 fuse4 = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
bitmap_set_value8(per_mask_bit, 0x3, 0);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 3d1c51de0268..a287b98ee70b 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -6,6 +6,7 @@
#ifndef _XE_GT_TYPES_H_
#define _XE_GT_TYPES_H_
+#include "xe_device_types.h"
#include "xe_force_wake_types.h"
#include "xe_gt_idle_types.h"
#include "xe_gt_sriov_pf_types.h"
@@ -145,19 +146,20 @@ struct xe_gt {
/**
* @mmio: mmio info for GT. All GTs within a tile share the same
* register space, but have their own copy of GSI registers at a
- * specific offset, as well as their own forcewake handling.
+ * specific offset.
+ */
+ struct xe_mmio mmio;
+
+ /**
+ * @pm: power management info for GT. The driver uses the GT's
+ * "force wake" interface to wake up specific parts of the GT hardware
+ * from C6 sleep states and ensure the hardware remains awake while it
+ * is being actively used.
*/
struct {
- /** @mmio.fw: force wake for GT */
+ /** @pm.fw: force wake for GT */
struct xe_force_wake fw;
- /**
- * @mmio.adj_limit: adjust MMIO address if address is below this
- * value
- */
- u32 adj_limit;
- /** @mmio.adj_offset: offect to add to MMIO address when adjusting */
- u32 adj_offset;
- } mmio;
+ } pm;
/** @sriov: virtualization data related to GT */
union {
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 52df28032a6f..7f704346a8f4 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -14,6 +14,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_gtt_defs.h"
#include "regs/xe_guc_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_force_wake.h"
@@ -22,6 +23,7 @@
#include "xe_gt_sriov_vf.h"
#include "xe_gt_throttle.h"
#include "xe_guc_ads.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_db_mgr.h"
#include "xe_guc_hwconfig.h"
@@ -68,7 +70,7 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
static u32 guc_ctl_feature_flags(struct xe_guc *guc)
{
- u32 flags = 0;
+ u32 flags = GUC_CTL_ENABLE_LITE_RESTORE;
if (!guc_to_xe(guc)->info.skip_guc_pc)
flags |= GUC_CTL_ENABLE_SLPC;
@@ -236,20 +238,21 @@ static void guc_write_params(struct xe_guc *guc)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- xe_mmio_write32(gt, SOFT_SCRATCH(0), 0);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(0), 0);
for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
- xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(1 + i), guc->params[i]);
}
static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
struct xe_gt *gt = guc_to_gt(guc);
+ unsigned int fw_ref;
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_fini_hw(&guc_to_gt(guc)->uc);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -338,6 +341,10 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
+ ret = xe_guc_capture_init(guc);
+ if (ret)
+ goto out;
+
ret = xe_guc_ads_init(&guc->ads);
if (ret)
goto out;
@@ -425,6 +432,7 @@ int xe_guc_post_load_init(struct xe_guc *guc)
int xe_guc_reset(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
u32 guc_status, gdrst;
int ret;
@@ -433,15 +441,15 @@ int xe_guc_reset(struct xe_guc *guc)
if (IS_SRIOV_VF(gt_to_xe(gt)))
return xe_gt_sriov_vf_bootstrap(gt);
- xe_mmio_write32(gt, GDRST, GRDOM_GUC);
+ xe_mmio_write32(mmio, GDRST, GRDOM_GUC);
- ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
+ ret = xe_mmio_wait32(mmio, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
if (ret) {
xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst);
goto err_out;
}
- guc_status = xe_mmio_read32(gt, GUC_STATUS);
+ guc_status = xe_mmio_read32(mmio, GUC_STATUS);
if (!(guc_status & GS_MIA_IN_RESET)) {
xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n",
guc_status);
@@ -459,6 +467,7 @@ err_out:
static void guc_prepare_xfer(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = guc_to_xe(guc);
u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
@@ -473,12 +482,12 @@ static void guc_prepare_xfer(struct xe_guc *guc)
shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index);
/* Must program this register before loading the ucode with DMA */
- xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags);
+ xe_mmio_write32(mmio, GUC_SHIM_CONTROL, shim_flags);
- xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
+ xe_mmio_write32(mmio, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
/* Make sure GuC receives ARAT interrupts */
- xe_mmio_rmw32(gt, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0);
+ xe_mmio_rmw32(mmio, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0);
}
/*
@@ -494,7 +503,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
if (guc->fw.rsa_size > 256) {
u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
xe_uc_fw_rsa_offset(&guc->fw);
- xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
+ xe_mmio_write32(&gt->mmio, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
return 0;
}
@@ -503,7 +512,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
return -ENOMEM;
for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
- xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]);
+ xe_mmio_write32(&gt->mmio, UOS_RSA_SCRATCH(i), rsa[i]);
return 0;
}
@@ -583,7 +592,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
* extreme thermal throttling. And a system that is that hot during boot is probably
* dead anyway!
*/
-#if defined(CONFIG_DRM_XE_DEBUG)
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define GUC_LOAD_RETRY_LIMIT 20
#else
#define GUC_LOAD_RETRY_LIMIT 3
@@ -593,6 +602,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
static void guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
ktime_t before, after, delta;
int load_done;
@@ -619,7 +629,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
* timeouts rather than allowing a huge timeout each time. So basically, need
* to treat a timeout no different to a value change.
*/
- ret = xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
+ ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
last_status, 1000 * 1000, &status, false);
if (ret < 0)
count++;
@@ -657,7 +667,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
switch (bootrom) {
case XE_BOOTROM_STATUS_NO_KEY_FOUND:
xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
- xe_mmio_read32(gt, GUC_HEADER_INFO));
+ xe_mmio_read32(mmio, GUC_HEADER_INFO));
break;
case XE_BOOTROM_STATUS_RSA_FAILED:
@@ -672,7 +682,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
switch (ukernel) {
case XE_GUC_LOAD_STATUS_EXCEPTION:
xe_gt_err(gt, "firmware exception. EIP: %#x\n",
- xe_mmio_read32(gt, SOFT_SCRATCH(13)));
+ xe_mmio_read32(mmio, SOFT_SCRATCH(13)));
break;
case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
@@ -824,10 +834,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
+ msg = xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(15));
msg &= XE_GUC_RECV_MSG_EXCEPTION |
XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
- xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(15), 0);
if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
xe_gt_err(gt, "Received early GuC crash dump notification!\n");
@@ -844,14 +854,14 @@ static void guc_enable_irq(struct xe_guc *guc)
REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
/* Primary GuC and media GuC share a single enable bit */
- xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,
+ xe_mmio_write32(&gt->mmio, GUC_SG_INTR_ENABLE,
REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
/*
* There are separate mask bits for primary and media GuCs, so use
* a RMW operation to avoid clobbering the other GuC's setting.
*/
- xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
+ xe_mmio_rmw32(&gt->mmio, GUC_SG_INTR_MASK, events, 0);
}
int xe_guc_enable_communication(struct xe_guc *guc)
@@ -863,7 +873,7 @@ int xe_guc_enable_communication(struct xe_guc *guc)
struct xe_gt *gt = guc_to_gt(guc);
struct xe_tile *tile = gt_to_tile(gt);
- err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc);
+ err = xe_memirq_init_guc(&tile->memirq, guc);
if (err)
return err;
} else {
@@ -907,7 +917,7 @@ void xe_guc_notify(struct xe_guc *guc)
* additional payload data to the GuC but this capability is not
* used by the firmware yet. Use default value in the meantime.
*/
- xe_mmio_write32(gt, guc->notify_reg, default_notify_data);
+ xe_mmio_write32(&gt->mmio, guc->notify_reg, default_notify_data);
}
int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
@@ -925,6 +935,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
u32 header, reply;
struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
@@ -947,19 +958,19 @@ retry:
/* Not in critical data-path, just do if else for GT type */
if (xe_gt_is_media_type(gt)) {
for (i = 0; i < len; ++i)
- xe_mmio_write32(gt, MED_VF_SW_FLAG(i),
+ xe_mmio_write32(mmio, MED_VF_SW_FLAG(i),
request[i]);
- xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX));
+ xe_mmio_read32(mmio, MED_VF_SW_FLAG(LAST_INDEX));
} else {
for (i = 0; i < len; ++i)
- xe_mmio_write32(gt, VF_SW_FLAG(i),
+ xe_mmio_write32(mmio, VF_SW_FLAG(i),
request[i]);
- xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX));
+ xe_mmio_read32(mmio, VF_SW_FLAG(LAST_INDEX));
}
xe_guc_notify(guc);
- ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN,
+ ret = xe_mmio_wait32(mmio, reply_reg, GUC_HXG_MSG_0_ORIGIN,
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC),
50000, &reply, false);
if (ret) {
@@ -969,7 +980,7 @@ timeout:
return ret;
}
- header = xe_mmio_read32(gt, reply_reg);
+ header = xe_mmio_read32(mmio, reply_reg);
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
/*
@@ -985,7 +996,7 @@ timeout:
BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS);
BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
- ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask,
+ ret = xe_mmio_wait32(mmio, reply_reg, resp_mask, resp_mask,
1000000, &header, false);
if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
@@ -1032,7 +1043,7 @@ proto:
for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
reply_reg.addr += sizeof(u32);
- response_buf[i] = xe_mmio_read32(gt, reply_reg);
+ response_buf[i] = xe_mmio_read32(mmio, reply_reg);
}
}
@@ -1145,17 +1156,17 @@ int xe_guc_start(struct xe_guc *guc)
void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_gt *gt = guc_to_gt(guc);
+ unsigned int fw_ref;
u32 status;
- int err;
int i;
xe_uc_fw_print(&guc->fw, p);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
- status = xe_mmio_read32(gt, GUC_STATUS);
+ status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
drm_printf(p, "\nGuC status 0x%08x:\n", status);
drm_printf(p, "\tBootrom status = 0x%x\n",
@@ -1170,12 +1181,15 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
drm_puts(p, "\nScratch registers:\n");
for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
drm_printf(p, "\t%2d: \t0x%x\n",
- i, xe_mmio_read32(gt, SOFT_SCRATCH(i)));
+ i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i)));
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ drm_puts(p, "\n");
xe_guc_ct_print(&guc->ct, p, false);
+
+ drm_puts(p, "\n");
xe_guc_submit_print(guc, p);
}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 42116b167c98..58338be44558 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -82,4 +82,9 @@ static inline struct xe_device *guc_to_xe(struct xe_guc *guc)
return gt_to_xe(guc_to_gt(guc));
}
+static inline struct drm_device *guc_to_drm(struct xe_guc *guc)
+{
+ return &guc_to_xe(guc)->drm;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index d1902a8581ca..4e746ae98888 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -5,6 +5,8 @@
#include "xe_guc_ads.h"
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@@ -18,6 +20,7 @@
#include "xe_gt_ccs_mode.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_hw_engine.h"
#include "xe_lrc.h"
@@ -149,8 +152,7 @@ static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
{
- /* FIXME: Allocate a proper capture list */
- return PAGE_ALIGN(PAGE_SIZE);
+ return PAGE_ALIGN(ads->capture_size);
}
static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
@@ -357,6 +359,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
&offset, &remain);
+ if (XE_WA(gt, 14022866841))
+ guc_waklv_enable_simple(ads,
+ GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
+ &offset, &remain);
+
/*
* On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
* the default value for this register is determined to be 0xC40. This could change in the
@@ -404,6 +411,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
struct xe_bo *bo;
ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+ ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
ads->regset_size = calculate_regset_size(gt);
ads->ads_waklv_size = calculate_waklv_size(ads);
@@ -418,14 +426,15 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
/**
* xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
* @ads: Additional data structures object
*
- * Recalcuate golden_lrc_size & regset_size as the number hardware engines may
- * have changed after the hwconfig was loaded. Also verify the new sizes fit in
- * the already allocated ADS buffer object.
+ * Recalculate golden_lrc_size, capture_size and regset_size as the number
+ * hardware engines may have changed after the hwconfig was loaded. Also verify
+ * the new sizes fit in the already allocated ADS buffer object.
*
* Return: 0 on success, negative error code on error.
*/
@@ -437,6 +446,8 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
xe_gt_assert(gt, ads->bo);
ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+ /* Calculate Capture size with worst size */
+ ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
ads->regset_size = calculate_regset_size(gt);
xe_gt_assert(gt, ads->golden_lrc_size +
@@ -536,20 +547,148 @@ static void guc_mapping_table_init(struct xe_gt *gt,
}
}
-static void guc_capture_list_init(struct xe_guc_ads *ads)
+static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
+ enum guc_capture_list_class_type capture_class)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 mask;
+
+ switch (capture_class) {
+ case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
+ mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_VIDEO:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_BLITTER:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
+ break;
+ default:
+ mask = 0;
+ }
+
+ return mask;
+}
+
+static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt,
+ int owner, int type, int class, u32 *total_size, size_t *size,
+ void **pptr)
+{
+ *size = 0;
+
+ if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) {
+ if (*total_size + *size > ads->capture_size)
+ xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n",
+ *total_size + *size, ads->capture_size);
+ else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr))
+ return false;
+ }
+
+ return true;
+}
+
+static int guc_capture_prep_lists(struct xe_guc_ads *ads)
{
+ struct xe_guc *guc = ads_to_guc(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
+ struct iosys_map info_map;
+ size_t size = 0;
+ void *ptr;
int i, j;
- u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads);
- /* FIXME: Populate a proper capture list */
+ /*
+ * GuC Capture's steered reg-list needs to be allocated and initialized
+ * after the GuC-hwconfig is available which guaranteed from here.
+ */
+ xe_guc_capture_steered_list_init(ads_to_guc(ads));
+
+ capture_offset = guc_ads_capture_offset(ads);
+ ads_ggtt = xe_bo_ggtt_addr(ads->bo);
+ info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+ offsetof(struct __guc_ads_blob, system_info));
+
+ /* first, set aside the first page for a capture_list with zero descriptors */
+ total_size = PAGE_SIZE;
+ if (!xe_guc_capture_getnullheader(guc, &ptr, &size))
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size);
+
+ null_ggtt = ads_ggtt + capture_offset;
+ capture_offset += PAGE_SIZE;
+
+ /*
+ * Populate capture list : at this point adps is already allocated and
+ * mapped to worst case size
+ */
for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
- for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
- ads_blob_write(ads, ads.capture_instance[i][j], addr);
- ads_blob_write(ads, ads.capture_class[i][j], addr);
+ bool write_empty_list;
+
+ for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
+ u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j);
+ /* null list if we dont have said engine or list */
+ if (!engine_mask) {
+ ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
+ ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
+ continue;
+ }
+
+ /* engine exists: start with engine-class registers */
+ write_empty_list = get_capture_list(ads, guc, gt, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ j, &total_size, &size, &ptr);
+ if (!write_empty_list) {
+ ads_blob_write(ads, ads.capture_class[i][j],
+ ads_ggtt + capture_offset);
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
+ ptr, size);
+ total_size += size;
+ capture_offset += size;
+ } else {
+ ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
+ }
+
+ /* engine exists: next, engine-instance registers */
+ write_empty_list = get_capture_list(ads, guc, gt, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
+ j, &total_size, &size, &ptr);
+ if (!write_empty_list) {
+ ads_blob_write(ads, ads.capture_instance[i][j],
+ ads_ggtt + capture_offset);
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
+ ptr, size);
+ total_size += size;
+ capture_offset += size;
+ } else {
+ ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
+ }
}
- ads_blob_write(ads, ads.capture_global[i], addr);
+ /* global registers is last in our PF/VF loops */
+ write_empty_list = get_capture_list(ads, guc, gt, i,
+ GUC_STATE_CAPTURE_TYPE_GLOBAL,
+ 0, &total_size, &size, &ptr);
+ if (!write_empty_list) {
+ ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset);
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr,
+ size);
+ total_size += size;
+ capture_offset += size;
+ } else {
+ ads_blob_write(ads, ads.capture_global[i], null_ggtt);
+ }
}
+
+ if (ads->capture_size != PAGE_ALIGN(total_size))
+ xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n",
+ ads->capture_size, PAGE_ALIGN(total_size));
+ return PAGE_ALIGN(total_size);
}
static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
@@ -684,7 +823,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads)
if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
u32 distdbreg =
- xe_mmio_read32(gt, DIST_DBS_POPULATED);
+ xe_mmio_read32(&gt->mmio, DIST_DBS_POPULATED);
ads_blob_write(ads,
system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
@@ -738,7 +877,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_mmio_reg_state_init(ads);
guc_prep_golden_lrc_null(ads);
guc_mapping_table_init(gt, &info_map);
- guc_capture_list_init(ads);
+ guc_capture_prep_lists(ads);
guc_doorbell_init(ads);
guc_waklv_init(ads);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
index 2de5decfe0fd..70c132458ac3 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -22,6 +22,8 @@ struct xe_guc_ads {
u32 regset_size;
/** @ads_waklv_size: total waklv size supported by platform */
u32 ads_waklv_size;
+ /** @capture_size: size of register set passed to GuC for capture */
+ u32 capture_size;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
new file mode 100644
index 000000000000..cc72446a5de1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -0,0 +1,1975 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2024 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_capture_abi.h"
+#include "abi/guc_log_abi.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "regs/xe_regs.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_capture.h"
+#include "xe_guc_capture_types.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_log.h"
+#include "xe_guc_submit_types.h"
+#include "xe_guc_submit.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_sched_job.h"
+
+/*
+ * struct __guc_capture_bufstate
+ *
+ * Book-keeping structure used to track read and write pointers
+ * as we extract error capture data from the GuC-log-buffer's
+ * error-capture region as a stream of dwords.
+ */
+struct __guc_capture_bufstate {
+ u32 size;
+ u32 data_offset;
+ u32 rd;
+ u32 wr;
+};
+
+/*
+ * struct __guc_capture_parsed_output - extracted error capture node
+ *
+ * A single unit of extracted error-capture output data grouped together
+ * at an engine-instance level. We keep these nodes in a linked list.
+ * See cachelist and outlist below.
+ */
+struct __guc_capture_parsed_output {
+ /*
+ * A single set of 3 capture lists: a global-list
+ * an engine-class-list and an engine-instance list.
+ * outlist in __guc_capture_parsed_output will keep
+ * a linked list of these nodes that will eventually
+ * be detached from outlist and attached into to
+ * xe_codedump in response to a context reset
+ */
+ struct list_head link;
+ bool is_partial;
+ u32 eng_class;
+ u32 eng_inst;
+ u32 guc_id;
+ u32 lrca;
+ u32 type;
+ bool locked;
+ enum xe_hw_engine_snapshot_source_id source;
+ struct gcap_reg_list_info {
+ u32 vfid;
+ u32 num_regs;
+ struct guc_mmio_reg *regs;
+ } reginfo[GUC_STATE_CAPTURE_TYPE_MAX];
+#define GCAP_PARSED_REGLIST_INDEX_GLOBAL BIT(GUC_STATE_CAPTURE_TYPE_GLOBAL)
+#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS)
+};
+
+/*
+ * Define all device tables of GuC error capture register lists
+ * NOTE:
+ * For engine-registers, GuC only needs the register offsets
+ * from the engine-mmio-base
+ *
+ * 64 bit registers need 2 entries for low 32 bit register and high 32 bit
+ * register, for example:
+ * Register data_type flags mask Register name
+ * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
+ * { XXX_REG_HI(0), REG_64BIT_HI_DW,, 0, 0, "XXX_REG"},
+ * 1. data_type: Indicate is hi/low 32 bit for a 64 bit register
+ * A 64 bit register define requires 2 consecutive entries,
+ * with low dword first and hi dword the second.
+ * 2. Register name: null for incompleted define
+ */
+#define COMMON_XELP_BASE_GLOBAL \
+ { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"}
+
+#define COMMON_BASE_ENGINE_INSTANCE \
+ { RING_HWSTAM(0), REG_32BIT, 0, 0, "HWSTAM"}, \
+ { RING_HWS_PGA(0), REG_32BIT, 0, 0, "RING_HWS_PGA"}, \
+ { RING_HEAD(0), REG_32BIT, 0, 0, "RING_HEAD"}, \
+ { RING_TAIL(0), REG_32BIT, 0, 0, "RING_TAIL"}, \
+ { RING_CTL(0), REG_32BIT, 0, 0, "RING_CTL"}, \
+ { RING_MI_MODE(0), REG_32BIT, 0, 0, "RING_MI_MODE"}, \
+ { RING_MODE(0), REG_32BIT, 0, 0, "RING_MODE"}, \
+ { RING_ESR(0), REG_32BIT, 0, 0, "RING_ESR"}, \
+ { RING_EMR(0), REG_32BIT, 0, 0, "RING_EMR"}, \
+ { RING_EIR(0), REG_32BIT, 0, 0, "RING_EIR"}, \
+ { RING_IMR(0), REG_32BIT, 0, 0, "RING_IMR"}, \
+ { RING_IPEHR(0), REG_32BIT, 0, 0, "IPEHR"}, \
+ { RING_INSTDONE(0), REG_32BIT, 0, 0, "RING_INSTDONE"}, \
+ { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, "INDIRECT_RING_STATE"}, \
+ { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, "ACTHD"}, \
+ { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_BBADDR"}, \
+ { RING_START(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_START"}, \
+ { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_DMA_FADD"}, \
+ { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_STATUS"}, \
+ { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_SQ_CONTENTS"}
+
+#define COMMON_XELP_RC_CLASS \
+ { RCU_MODE, REG_32BIT, 0, 0, "RCU_MODE"}
+
+#define COMMON_XELP_RC_CLASS_INSTDONE \
+ { SC_INSTDONE, REG_32BIT, 0, 0, "SC_INSTDONE"}, \
+ { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA"}, \
+ { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA2"}
+
+#define XELP_VEC_CLASS_REGS \
+ { SFC_DONE(0), 0, 0, 0, "SFC_DONE[0]"}, \
+ { SFC_DONE(1), 0, 0, 0, "SFC_DONE[1]"}, \
+ { SFC_DONE(2), 0, 0, 0, "SFC_DONE[2]"}, \
+ { SFC_DONE(3), 0, 0, 0, "SFC_DONE[3]"}
+
+/* XE_LP Global */
+static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
+ COMMON_XELP_BASE_GLOBAL,
+};
+
+/* Render / Compute Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_rc_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* Render / Compute Engine-Class */
+static const struct __guc_mmio_reg_descr xe_rc_class_regs[] = {
+ COMMON_XELP_RC_CLASS,
+ COMMON_XELP_RC_CLASS_INSTDONE,
+};
+
+/* Render / Compute Engine-Class for xehpg */
+static const struct __guc_mmio_reg_descr xe_hpg_rc_class_regs[] = {
+ COMMON_XELP_RC_CLASS,
+};
+
+/* Media Decode/Encode Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_vd_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* Video Enhancement Engine-Class */
+static const struct __guc_mmio_reg_descr xe_vec_class_regs[] = {
+ XELP_VEC_CLASS_REGS,
+};
+
+/* Video Enhancement Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_vec_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* Blitter Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_blt_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* XE_LP - GSC Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/*
+ * Empty list to prevent warnings about unknown class/instance types
+ * as not all class/instance types have entries on all platforms.
+ */
+static const struct __guc_mmio_reg_descr empty_regs_list[] = {
+};
+
+#define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
+#define TO_GCAP_DEF_TYPE(x) (GUC_STATE_CAPTURE_TYPE_##x)
+#define MAKE_REGLIST(regslist, regsowner, regstype, class) \
+ { \
+ regslist, \
+ ARRAY_SIZE(regslist), \
+ TO_GCAP_DEF_OWNER(regsowner), \
+ TO_GCAP_DEF_TYPE(regstype), \
+ class \
+ }
+
+/* List of lists for legacy graphic product version < 1255 */
+static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
+ MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(xe_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ {}
+};
+
+ /* List of lists for graphic product version >= 1255 */
+static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = {
+ MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ {}
+};
+
+static const char * const capture_list_type_names[] = {
+ "Global",
+ "Class",
+ "Instance",
+};
+
+static const char * const capture_engine_class_names[] = {
+ "Render/Compute",
+ "Video",
+ "VideoEnhance",
+ "Blitter",
+ "GSC-Other",
+};
+
+struct __guc_capture_ads_cache {
+ bool is_valid;
+ void *ptr;
+ size_t size;
+ int status;
+};
+
+struct xe_guc_state_capture {
+ const struct __guc_mmio_reg_descr_group *reglists;
+ /**
+ * NOTE: steered registers have multiple instances depending on the HW configuration
+ * (slices or dual-sub-slices) and thus depends on HW fuses discovered
+ */
+ struct __guc_mmio_reg_descr_group *extlists;
+ struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
+ [GUC_STATE_CAPTURE_TYPE_MAX]
+ [GUC_CAPTURE_LIST_CLASS_MAX];
+ void *ads_null_cache;
+ struct list_head cachelist;
+#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
+#define PREALLOC_NODES_DEFAULT_NUMREGS 64
+
+ int max_mmio_per_node;
+ struct list_head outlist;
+};
+
+static void
+guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node);
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_device_reglist(struct xe_device *xe)
+{
+ if (GRAPHICS_VERx100(xe) >= 1255)
+ return xe_hpg_lists;
+ else
+ return xe_lp_lists;
+}
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
+ u32 owner, u32 type, enum guc_capture_list_class_type capture_class)
+{
+ int i;
+
+ if (!reglists)
+ return NULL;
+
+ for (i = 0; reglists[i].list; ++i) {
+ if (reglists[i].owner == owner && reglists[i].type == type &&
+ (reglists[i].engine == capture_class ||
+ reglists[i].type == GUC_STATE_CAPTURE_TYPE_GLOBAL))
+ return &reglists[i];
+ }
+
+ return NULL;
+}
+
+const struct __guc_mmio_reg_descr_group *
+xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, bool is_ext)
+{
+ const struct __guc_mmio_reg_descr_group *reglists;
+
+ if (is_ext) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ reglists = guc->capture->extlists;
+ } else {
+ reglists = guc_capture_get_device_reglist(gt_to_xe(gt));
+ }
+ return guc_capture_get_one_list(reglists, owner, type, capture_class);
+}
+
+struct __ext_steer_reg {
+ const char *name;
+ struct xe_reg_mcr reg;
+};
+
+static const struct __ext_steer_reg xe_extregs[] = {
+ {"SAMPLER_INSTDONE", SAMPLER_INSTDONE},
+ {"ROW_INSTDONE", ROW_INSTDONE}
+};
+
+static const struct __ext_steer_reg xehpg_extregs[] = {
+ {"SC_INSTDONE", XEHPG_SC_INSTDONE},
+ {"SC_INSTDONE_EXTRA", XEHPG_SC_INSTDONE_EXTRA},
+ {"SC_INSTDONE_EXTRA2", XEHPG_SC_INSTDONE_EXTRA2},
+ {"INSTDONE_GEOM_SVGUNIT", XEHPG_INSTDONE_GEOM_SVGUNIT}
+};
+
+static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
+ const struct __ext_steer_reg *extlist,
+ int slice_id, int subslice_id)
+{
+ if (!ext || !extlist)
+ return;
+
+ ext->reg = XE_REG(extlist->reg.__reg.addr);
+ ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
+ ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
+ ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
+ ext->regname = extlist->name;
+}
+
+static int
+__alloc_ext_regs(struct drm_device *drm, struct __guc_mmio_reg_descr_group *newlist,
+ const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
+{
+ struct __guc_mmio_reg_descr *list;
+
+ list = drmm_kzalloc(drm, num_regs * sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
+ if (!list)
+ return -ENOMEM;
+
+ newlist->list = list;
+ newlist->num_regs = num_regs;
+ newlist->owner = rootlist->owner;
+ newlist->engine = rootlist->engine;
+ newlist->type = rootlist->type;
+
+ return 0;
+}
+
+static int guc_capture_get_steer_reg_num(struct xe_device *xe)
+{
+ int num = ARRAY_SIZE(xe_extregs);
+
+ if (GRAPHICS_VERx100(xe) >= 1255)
+ num += ARRAY_SIZE(xehpg_extregs);
+
+ return num;
+}
+
+static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u16 slice, subslice;
+ int iter, i, total = 0;
+ const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists;
+ const struct __guc_mmio_reg_descr_group *list;
+ struct __guc_mmio_reg_descr_group *extlists;
+ struct __guc_mmio_reg_descr *extarray;
+ bool has_xehpg_extregs = GRAPHICS_VERx100(gt_to_xe(gt)) >= 1255;
+ struct drm_device *drm = &gt_to_xe(gt)->drm;
+ bool has_rcs_ccs = false;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ /*
+ * If GT has no rcs/ccs, no need to alloc steered list.
+ * Currently, only rcs/ccs has steering register, if in the future,
+ * other engine types has steering register, this condition check need
+ * to be extended
+ */
+ for_each_hw_engine(hwe, gt, id) {
+ if (xe_engine_class_to_guc_capture_class(hwe->class) ==
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
+ has_rcs_ccs = true;
+ break;
+ }
+ }
+
+ if (!has_rcs_ccs)
+ return;
+
+ /* steered registers currently only exist for the render-class */
+ list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE);
+ /*
+ * Skip if this platform has no engine class registers or if extlists
+ * was previously allocated
+ */
+ if (!list || guc->capture->extlists)
+ return;
+
+ total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) *
+ guc_capture_get_steer_reg_num(guc_to_xe(guc));
+
+ if (!total)
+ return;
+
+ /* allocate an extra for an end marker */
+ extlists = drmm_kzalloc(drm, 2 * sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
+ if (!extlists)
+ return;
+
+ if (__alloc_ext_regs(drm, &extlists[0], list, total)) {
+ drmm_kfree(drm, extlists);
+ return;
+ }
+
+ /* For steering registers, the list is generated at run-time */
+ extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
+ for_each_dss_steering(iter, gt, slice, subslice) {
+ for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
+ __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
+ ++extarray;
+ }
+
+ if (has_xehpg_extregs)
+ for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
+ __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
+ ++extarray;
+ }
+ }
+
+ extlists[0].num_regs = total;
+
+ xe_gt_dbg(guc_to_gt(guc), "capture found %d ext-regs.\n", total);
+ guc->capture->extlists = extlists;
+}
+
+static int
+guc_capture_list_init(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, struct guc_mmio_reg *ptr,
+ u16 num_entries)
+{
+ u32 ptr_idx = 0, list_idx = 0;
+ const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
+ struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
+ const struct __guc_mmio_reg_descr_group *match;
+ u32 list_num;
+
+ if (!reglists)
+ return -ENODEV;
+
+ match = guc_capture_get_one_list(reglists, owner, type, capture_class);
+ if (!match)
+ return -ENODATA;
+
+ list_num = match->num_regs;
+ for (list_idx = 0; ptr_idx < num_entries && list_idx < list_num; ++list_idx, ++ptr_idx) {
+ ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
+ ptr[ptr_idx].value = 0xDEADF00D;
+ ptr[ptr_idx].flags = match->list[list_idx].flags;
+ ptr[ptr_idx].mask = match->list[list_idx].mask;
+ }
+
+ match = guc_capture_get_one_list(extlists, owner, type, capture_class);
+ if (match)
+ for (ptr_idx = list_num, list_idx = 0;
+ ptr_idx < num_entries && list_idx < match->num_regs;
+ ++ptr_idx, ++list_idx) {
+ ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
+ ptr[ptr_idx].value = 0xDEADF00D;
+ ptr[ptr_idx].flags = match->list[list_idx].flags;
+ ptr[ptr_idx].mask = match->list[list_idx].mask;
+ }
+
+ if (ptr_idx < num_entries)
+ xe_gt_dbg(guc_to_gt(guc), "Got short capture reglist init: %d out-of %d.\n",
+ ptr_idx, num_entries);
+
+ return 0;
+}
+
+static int
+guc_cap_list_num_regs(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class)
+{
+ const struct __guc_mmio_reg_descr_group *match;
+ int num_regs = 0;
+
+ match = guc_capture_get_one_list(guc->capture->reglists, owner, type, capture_class);
+ if (match)
+ num_regs = match->num_regs;
+
+ match = guc_capture_get_one_list(guc->capture->extlists, owner, type, capture_class);
+ if (match)
+ num_regs += match->num_regs;
+ else
+ /*
+ * If a caller wants the full register dump size but we have
+ * not yet got the hw-config, which is before max_mmio_per_node
+ * is initialized, then provide a worst-case number for
+ * extlists based on max dss fuse bits, but only ever for
+ * render/compute
+ */
+ if (owner == GUC_CAPTURE_LIST_INDEX_PF &&
+ type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+ capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE &&
+ !guc->capture->max_mmio_per_node)
+ num_regs += guc_capture_get_steer_reg_num(guc_to_xe(guc)) *
+ XE_MAX_DSS_FUSE_BITS;
+
+ return num_regs;
+}
+
+static int
+guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class,
+ size_t *size, bool is_purpose_est)
+{
+ struct xe_guc_state_capture *gc = guc->capture;
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct __guc_capture_ads_cache *cache;
+ int num_regs;
+
+ xe_gt_assert(gt, type < GUC_STATE_CAPTURE_TYPE_MAX);
+ xe_gt_assert(gt, capture_class < GUC_CAPTURE_LIST_CLASS_MAX);
+
+ cache = &gc->ads_cache[owner][type][capture_class];
+ if (!gc->reglists) {
+ xe_gt_warn(gt, "No capture reglist for this device\n");
+ return -ENODEV;
+ }
+
+ if (cache->is_valid) {
+ *size = cache->size;
+ return cache->status;
+ }
+
+ if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
+ !guc_capture_get_one_list(gc->reglists, owner, type, capture_class)) {
+ if (type == GUC_STATE_CAPTURE_TYPE_GLOBAL)
+ xe_gt_warn(gt, "Missing capture reglist: global!\n");
+ else
+ xe_gt_warn(gt, "Missing capture reglist: %s(%u):%s(%u)!\n",
+ capture_list_type_names[type], type,
+ capture_engine_class_names[capture_class], capture_class);
+ return -ENODEV;
+ }
+
+ num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
+ /* intentional empty lists can exist depending on hw config */
+ if (!num_regs)
+ return -ENODATA;
+
+ if (size)
+ *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
+ (num_regs * sizeof(struct guc_mmio_reg)));
+
+ return 0;
+}
+
+/**
+ * xe_guc_capture_getlistsize - Get list size for owner/type/class combination
+ * @guc: The GuC object
+ * @owner: PF/VF owner
+ * @type: GuC capture register type
+ * @capture_class: GuC capture engine class id
+ * @size: Point to the size
+ *
+ * This function will get the list for the owner/type/class combination, and
+ * return the page aligned list size.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int
+xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, size_t *size)
+{
+ return guc_capture_getlistsize(guc, owner, type, capture_class, size, false);
+}
+
+/**
+ * xe_guc_capture_getlist - Get register capture list for owner/type/class
+ * combination
+ * @guc: The GuC object
+ * @owner: PF/VF owner
+ * @type: GuC capture register type
+ * @capture_class: GuC capture engine class id
+ * @outptr: Point to cached register capture list
+ *
+ * This function will get the register capture list for the owner/type/class
+ * combination.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int
+xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, void **outptr)
+{
+ struct xe_guc_state_capture *gc = guc->capture;
+ struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][capture_class];
+ struct guc_debug_capture_list *listnode;
+ int ret, num_regs;
+ u8 *caplist, *tmp;
+ size_t size = 0;
+
+ if (!gc->reglists)
+ return -ENODEV;
+
+ if (cache->is_valid) {
+ *outptr = cache->ptr;
+ return cache->status;
+ }
+
+ ret = xe_guc_capture_getlistsize(guc, owner, type, capture_class, &size);
+ if (ret) {
+ cache->is_valid = true;
+ cache->ptr = NULL;
+ cache->size = 0;
+ cache->status = ret;
+ return ret;
+ }
+
+ caplist = drmm_kzalloc(guc_to_drm(guc), size, GFP_KERNEL);
+ if (!caplist)
+ return -ENOMEM;
+
+ /* populate capture list header */
+ tmp = caplist;
+ num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
+ listnode = (struct guc_debug_capture_list *)tmp;
+ listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
+
+ /* populate list of register descriptor */
+ tmp += sizeof(struct guc_debug_capture_list);
+ guc_capture_list_init(guc, owner, type, capture_class,
+ (struct guc_mmio_reg *)tmp, num_regs);
+
+ /* cache this list */
+ cache->is_valid = true;
+ cache->ptr = caplist;
+ cache->size = size;
+ cache->status = 0;
+
+ *outptr = caplist;
+
+ return 0;
+}
+
+/**
+ * xe_guc_capture_getnullheader - Get a null list for register capture
+ * @guc: The GuC object
+ * @outptr: Point to cached register capture list
+ * @size: Point to the size
+ *
+ * This function will alloc for a null list for register capture.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int
+xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size)
+{
+ struct xe_guc_state_capture *gc = guc->capture;
+ int tmp = sizeof(u32) * 4;
+ void *null_header;
+
+ if (gc->ads_null_cache) {
+ *outptr = gc->ads_null_cache;
+ *size = tmp;
+ return 0;
+ }
+
+ null_header = drmm_kzalloc(guc_to_drm(guc), tmp, GFP_KERNEL);
+ if (!null_header)
+ return -ENOMEM;
+
+ gc->ads_null_cache = null_header;
+ *outptr = null_header;
+ *size = tmp;
+
+ return 0;
+}
+
+/**
+ * xe_guc_capture_ads_input_worst_size - Calculate the worst size for GuC register capture
+ * @guc: point to xe_guc structure
+ *
+ * Calculate the worst size for GuC register capture by including all possible engines classes.
+ *
+ * Returns: Calculated size
+ */
+size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
+{
+ size_t total_size, class_size, instance_size, global_size;
+ int i, j;
+
+ /*
+ * This function calculates the worst case register lists size by
+ * including all possible engines classes. It is called during the
+ * first of a two-phase GuC (and ADS-population) initialization
+ * sequence, that is, during the pre-hwconfig phase before we have
+ * the exact engine fusing info.
+ */
+ total_size = PAGE_SIZE; /* Pad a page in front for empty lists */
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+ for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
+ if (xe_guc_capture_getlistsize(guc, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ j, &class_size) < 0)
+ class_size = 0;
+ if (xe_guc_capture_getlistsize(guc, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
+ j, &instance_size) < 0)
+ instance_size = 0;
+ total_size += class_size + instance_size;
+ }
+ if (xe_guc_capture_getlistsize(guc, i,
+ GUC_STATE_CAPTURE_TYPE_GLOBAL,
+ 0, &global_size) < 0)
+ global_size = 0;
+ total_size += global_size;
+ }
+
+ return PAGE_ALIGN(total_size);
+}
+
+static int guc_capture_output_size_est(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ int capture_size = 0;
+ size_t tmp = 0;
+
+ if (!guc->capture)
+ return -ENODEV;
+
+ /*
+ * If every single engine-instance suffered a failure in quick succession but
+ * were all unrelated, then a burst of multiple error-capture events would dump
+ * registers for every one engine instance, one at a time. In this case, GuC
+ * would even dump the global-registers repeatedly.
+ *
+ * For each engine instance, there would be 1 x guc_state_capture_group_t output
+ * followed by 3 x guc_state_capture_t lists. The latter is how the register
+ * dumps are split across different register types (where the '3' are global vs class
+ * vs instance).
+ */
+ for_each_hw_engine(hwe, gt, id) {
+ enum guc_capture_list_class_type capture_class;
+
+ capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
+ capture_size += sizeof(struct guc_state_capture_group_header_t) +
+ (3 * sizeof(struct guc_state_capture_header_t));
+
+ if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
+ 0, &tmp, true))
+ capture_size += tmp;
+ if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ capture_class, &tmp, true))
+ capture_size += tmp;
+ if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
+ capture_class, &tmp, true))
+ capture_size += tmp;
+ }
+
+ return capture_size;
+}
+
+/*
+ * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
+ * before the Xe can read the data out and process it
+ */
+#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
+
+static void check_guc_capture_size(struct xe_guc *guc)
+{
+ int capture_size = guc_capture_output_size_est(guc);
+ int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
+ u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);
+
+ /*
+ * NOTE: capture_size is much smaller than the capture region
+ * allocation (DG2: <80K vs 1MB).
+ * Additionally, its based on space needed to fit all engines getting
+ * reset at once within the same G2H handler task slot. This is very
+ * unlikely. However, if GuC really does run out of space for whatever
+ * reason, we will see an separate warning message when processing the
+ * G2H event capture-notification, search for:
+ * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
+ */
+ if (capture_size < 0)
+ xe_gt_dbg(guc_to_gt(guc),
+ "Failed to calculate error state capture buffer minimum size: %d!\n",
+ capture_size);
+ if (capture_size > buffer_size)
+ xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
+ buffer_size, capture_size);
+ else if (spare_size > buffer_size)
+ xe_gt_dbg(guc_to_gt(guc),
+ "Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
+ buffer_size, spare_size, capture_size);
+}
+
+static void
+guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
+ struct list_head *list)
+{
+ list_add(&node->link, list);
+}
+
+static void
+guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_remove_stale_matches_from_list(gc, node);
+ guc_capture_add_node_to_list(node, &gc->outlist);
+}
+
+static void
+guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_add_node_to_list(node, &gc->cachelist);
+}
+
+static void
+guc_capture_free_outlist_node(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *n)
+{
+ if (n) {
+ n->locked = 0;
+ list_del(&n->link);
+ /* put node back to cache list */
+ guc_capture_add_node_to_cachelist(gc, n);
+ }
+}
+
+static void
+guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ struct __guc_capture_parsed_output *n, *ntmp;
+ int guc_id = node->guc_id;
+
+ list_for_each_entry_safe(n, ntmp, &gc->outlist, link) {
+ if (n != node && !n->locked && n->guc_id == guc_id)
+ guc_capture_free_outlist_node(gc, n);
+ }
+}
+
+static void
+guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
+{
+ struct guc_mmio_reg *tmp[GUC_STATE_CAPTURE_TYPE_MAX];
+ int i;
+
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ tmp[i] = node->reginfo[i].regs;
+ memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
+ guc->capture->max_mmio_per_node);
+ }
+ memset(node, 0, sizeof(*node));
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i)
+ node->reginfo[i].regs = tmp[i];
+
+ INIT_LIST_HEAD(&node->link);
+}
+
+/**
+ * DOC: Init, G2H-event and reporting flows for GuC-error-capture
+ *
+ * KMD Init time flows:
+ * --------------------
+ * --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
+ * xe_guc_ads acquires the register lists by calling
+ * xe_guc_capture_getlistsize and xe_guc_capture_getlist 'n' times,
+ * where n = 1 for global-reg-list +
+ * num_engine_classes for class-reg-list +
+ * num_engine_classes for instance-reg-list
+ * (since all instances of the same engine-class type
+ * have an identical engine-instance register-list).
+ * ADS module also calls separately for PF vs VF.
+ *
+ * --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
+ * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
+ * Note2: 'x 3' to hold multiple capture groups
+ *
+ * GUC Runtime notify capture:
+ * --------------------------
+ * --> G2H STATE_CAPTURE_NOTIFICATION
+ * L--> xe_guc_capture_process
+ * L--> Loop through B (head..tail) and for each engine instance's
+ * err-state-captured register-list we find, we alloc 'C':
+ * --> alloc C: A capture-output-node structure that includes misc capture info along
+ * with 3 register list dumps (global, engine-class and engine-instance)
+ * This node is created from a pre-allocated list of blank nodes in
+ * guc->capture->cachelist and populated with the error-capture
+ * data from GuC and then it's added into guc->capture->outlist linked
+ * list. This list is used for matchup and printout by xe_devcoredump_read
+ * and xe_engine_snapshot_print, (when user invokes the devcoredump sysfs).
+ *
+ * GUC --> notify context reset:
+ * -----------------------------
+ * --> guc_exec_queue_timedout_job
+ * L--> xe_devcoredump
+ * L--> devcoredump_snapshot
+ * --> xe_hw_engine_snapshot_capture
+ * --> xe_engine_manual_capture(For manual capture)
+ *
+ * User Sysfs / Debugfs
+ * --------------------
+ * --> xe_devcoredump_read->
+ * L--> xxx_snapshot_print
+ * L--> xe_engine_snapshot_print
+ * Print register lists values saved at
+ * guc->capture->outlist
+ *
+ */
+
+static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
+{
+ if (buf->wr >= buf->rd)
+ return (buf->wr - buf->rd);
+ return (buf->size - buf->rd) + buf->wr;
+}
+
+static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
+{
+ if (buf->rd > buf->wr)
+ return (buf->size - buf->rd);
+ return (buf->wr - buf->rd);
+}
+
+/*
+ * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
+ *
+ * The GuC Log buffer region for error-capture is managed like a ring buffer.
+ * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
+ * Additionally, as per the current and foreseeable future, all packed error-
+ * capture output structures are dword aligned.
+ *
+ * That said, if the GuC firmware is in the midst of writing a structure that is larger
+ * than one dword but the tail end of the err-capture buffer-region has lesser space left,
+ * we would need to extract that structure one dword at a time straddled across the end,
+ * onto the start of the ring.
+ *
+ * Below function, guc_capture_log_remove_bytes is a helper for that. All callers of this
+ * function would typically do a straight-up memcpy from the ring contents and will only
+ * call this helper if their structure-extraction is straddling across the end of the
+ * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
+ * scalability for future expansion of output data types without requiring a redesign
+ * of the flow controls.
+ */
+static int
+guc_capture_log_remove_bytes(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ void *out, int bytes_needed)
+{
+#define GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX 3
+
+ int fill_size = 0, tries = GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX;
+ int copy_size, avail;
+
+ xe_assert(guc_to_xe(guc), bytes_needed % sizeof(u32) == 0);
+
+ if (bytes_needed > guc_capture_buf_cnt(buf))
+ return -1;
+
+ while (bytes_needed > 0 && tries--) {
+ int misaligned;
+
+ avail = guc_capture_buf_cnt_to_end(buf);
+ misaligned = avail % sizeof(u32);
+ /* wrap if at end */
+ if (!avail) {
+ /* output stream clipped */
+ if (!buf->rd)
+ return fill_size;
+ buf->rd = 0;
+ continue;
+ }
+
+ /* Only copy to u32 aligned data */
+ copy_size = avail < bytes_needed ? avail - misaligned : bytes_needed;
+ xe_map_memcpy_from(guc_to_xe(guc), out + fill_size, &guc->log.bo->vmap,
+ buf->data_offset + buf->rd, copy_size);
+ buf->rd += copy_size;
+ fill_size += copy_size;
+ bytes_needed -= copy_size;
+
+ if (misaligned)
+ xe_gt_warn(guc_to_gt(guc),
+ "Bytes extraction not dword aligned, clipping.\n");
+ }
+
+ return fill_size;
+}
+
+static int
+guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_group_header_t *ghdr)
+{
+ int fullsize = sizeof(struct guc_state_capture_group_header_t);
+
+ if (guc_capture_log_remove_bytes(guc, buf, ghdr, fullsize) != fullsize)
+ return -1;
+ return 0;
+}
+
+static int
+guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_header_t *hdr)
+{
+ int fullsize = sizeof(struct guc_state_capture_header_t);
+
+ if (guc_capture_log_remove_bytes(guc, buf, hdr, fullsize) != fullsize)
+ return -1;
+ return 0;
+}
+
+static int
+guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_mmio_reg *reg)
+{
+ int fullsize = sizeof(struct guc_mmio_reg);
+
+ if (guc_capture_log_remove_bytes(guc, buf, reg, fullsize) != fullsize)
+ return -1;
+ return 0;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_get_prealloc_node(struct xe_guc *guc)
+{
+ struct __guc_capture_parsed_output *found = NULL;
+
+ if (!list_empty(&guc->capture->cachelist)) {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /* get first avail node from the cache list */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
+ found = n;
+ break;
+ }
+ } else {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /*
+ * traverse reversed and steal back the oldest node already
+ * allocated
+ */
+ list_for_each_entry_safe_reverse(n, ntmp, &guc->capture->outlist, link) {
+ if (!n->locked)
+ found = n;
+ }
+ }
+ if (found) {
+ list_del(&found->link);
+ guc_capture_init_node(guc, found);
+ }
+
+ return found;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
+ u32 keep_reglist_mask)
+{
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = guc_capture_get_prealloc_node(guc);
+ if (!new)
+ return NULL;
+ if (!original)
+ return new;
+
+ new->is_partial = original->is_partial;
+
+ /* copy reg-lists that we want to clone */
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ if (keep_reglist_mask & BIT(i)) {
+ XE_WARN_ON(original->reginfo[i].num_regs >
+ guc->capture->max_mmio_per_node);
+
+ memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
+ original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
+
+ new->reginfo[i].num_regs = original->reginfo[i].num_regs;
+ new->reginfo[i].vfid = original->reginfo[i].vfid;
+
+ if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS) {
+ new->eng_class = original->eng_class;
+ } else if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+ new->eng_inst = original->eng_inst;
+ new->guc_id = original->guc_id;
+ new->lrca = original->lrca;
+ }
+ }
+ }
+
+ return new;
+}
+
+static int
+guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct guc_state_capture_group_header_t ghdr = {0};
+ struct guc_state_capture_header_t hdr = {0};
+ struct __guc_capture_parsed_output *node = NULL;
+ struct guc_mmio_reg *regs = NULL;
+ int i, numlists, numregs, ret = 0;
+ enum guc_state_capture_type datatype;
+ struct guc_mmio_reg tmp;
+ bool is_partial = false;
+
+ i = guc_capture_buf_cnt(buf);
+ if (!i)
+ return -ENODATA;
+
+ if (i % sizeof(u32)) {
+ xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
+ ret = -EIO;
+ goto bailout;
+ }
+
+ /* first get the capture group header */
+ if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
+ ret = -EIO;
+ goto bailout;
+ }
+ /*
+ * we would typically expect a layout as below where n would be expected to be
+ * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
+ * instances being reset together.
+ * ____________________________________________
+ * | Capture Group |
+ * | ________________________________________ |
+ * | | Capture Group Header: | |
+ * | | - num_captures = 5 | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture1: | |
+ * | | Hdr: GLOBAL, numregs=a | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rega | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture2: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regb | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture3: | |
+ * | | Hdr: INSTANCE=RCS, numregs=c | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regc | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture4: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regd | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture5: | |
+ * | | Hdr: INSTANCE=CCS0, numregs=e | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rege | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * |__________________________________________|
+ */
+ is_partial = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE, ghdr.info);
+ numlists = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES, ghdr.info);
+
+ while (numlists--) {
+ if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
+ ret = -EIO;
+ break;
+ }
+
+ datatype = FIELD_GET(GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE, hdr.info);
+ if (datatype > GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+ /* unknown capture type - skip over to next capture set */
+ numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
+ hdr.num_mmio_entries);
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, &tmp)) {
+ ret = -EIO;
+ break;
+ }
+ }
+ continue;
+ } else if (node) {
+ /*
+ * Based on the current capture type and what we have so far,
+ * decide if we should add the current node into the internal
+ * linked list for match-up when xe_devcoredump calls later
+ * (and alloc a blank node for the next set of reglists)
+ * or continue with the same node or clone the current node
+ * but only retain the global or class registers (such as the
+ * case of dependent engine resets).
+ */
+ if (datatype == GUC_STATE_CAPTURE_TYPE_GLOBAL) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ } else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+ node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS].num_regs) {
+ /* Add to list, clone node and duplicate global list */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ GCAP_PARSED_REGLIST_INDEX_GLOBAL);
+ } else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE &&
+ node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE].num_regs) {
+ /* Add to list, clone node and duplicate global + class lists */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
+ GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
+ }
+ }
+
+ if (!node) {
+ node = guc_capture_get_prealloc_node(guc);
+ if (!node) {
+ ret = -ENOMEM;
+ break;
+ }
+ if (datatype != GUC_STATE_CAPTURE_TYPE_GLOBAL)
+ xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
+ datatype);
+ }
+ node->is_partial = is_partial;
+ node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner);
+ node->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
+ node->type = datatype;
+
+ switch (datatype) {
+ case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE:
+ node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
+ hdr.info);
+ node->eng_inst = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE,
+ hdr.info);
+ node->lrca = hdr.lrca;
+ node->guc_id = hdr.guc_id;
+ break;
+ case GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS:
+ node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
+ hdr.info);
+ break;
+ default:
+ break;
+ }
+
+ numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
+ hdr.num_mmio_entries);
+ if (numregs > guc->capture->max_mmio_per_node) {
+ xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
+ numregs = guc->capture->max_mmio_per_node;
+ }
+ node->reginfo[datatype].num_regs = numregs;
+ regs = node->reginfo[datatype].regs;
+ i = 0;
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
+ ret = -EIO;
+ break;
+ }
+ }
+ }
+
+bailout:
+ if (node) {
+ /* If we have data, add to linked list for match-up when xe_devcoredump calls */
+ for (i = GUC_STATE_CAPTURE_TYPE_GLOBAL; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ if (node->reginfo[i].regs) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ break;
+ }
+ }
+ if (node) /* else return it back to cache list */
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+ return ret;
+}
+
+static int __guc_capture_flushlog_complete(struct xe_guc *guc)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+ GUC_LOG_BUFFER_CAPTURE
+ };
+
+ return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static void __guc_capture_process_output(struct xe_guc *guc)
+{
+ unsigned int buffer_size, read_offset, write_offset, full_count;
+ struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
+ struct guc_log_buffer_state log_buf_state_local;
+ struct __guc_capture_bufstate buf;
+ bool new_overflow;
+ int ret, tmp;
+ u32 log_buf_state_offset;
+ u32 src_data_offset;
+
+ log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE;
+ src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+
+ /*
+ * Make a copy of the state structure, inside GuC log buffer
+ * (which is uncached mapped), on the stack to avoid reading
+ * from it multiple times.
+ */
+ xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
+ log_buf_state_offset, sizeof(struct guc_log_buffer_state));
+
+ buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+ read_offset = log_buf_state_local.read_ptr;
+ write_offset = log_buf_state_local.sampled_write_ptr;
+ full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
+
+ /* Bookkeeping stuff */
+ tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
+ guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp;
+ new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE,
+ full_count);
+
+ /* Now copy the actual logs. */
+ if (unlikely(new_overflow)) {
+ /* copy the whole buffer in case of overflow */
+ read_offset = 0;
+ write_offset = buffer_size;
+ } else if (unlikely((read_offset > buffer_size) ||
+ (write_offset > buffer_size))) {
+ xe_gt_err(guc_to_gt(guc),
+ "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
+ read_offset, buffer_size);
+ /* copy whole buffer as offsets are unreliable */
+ read_offset = 0;
+ write_offset = buffer_size;
+ }
+
+ buf.size = buffer_size;
+ buf.rd = read_offset;
+ buf.wr = write_offset;
+ buf.data_offset = src_data_offset;
+
+ if (!xe_guc_read_stopped(guc)) {
+ do {
+ ret = guc_capture_extract_reglists(guc, &buf);
+ if (ret && ret != -ENODATA)
+ xe_gt_dbg(guc_to_gt(guc), "Capture extraction failed:%d\n", ret);
+ } while (ret >= 0);
+ }
+
+ /* Update the state of log buffer err-cap state */
+ xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+ log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
+ write_offset);
+
+ /*
+ * Clear the flush_to_file from local first, the local was loaded by above
+ * xe_map_memcpy_from, then write out the "updated local" through
+ * xe_map_wr()
+ */
+ log_buf_state_local.flags &= ~GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE;
+ xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+ log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
+ log_buf_state_local.flags);
+ __guc_capture_flushlog_complete(guc);
+}
+
+/*
+ * xe_guc_capture_process - Process GuC register captured data
+ * @guc: The GuC object
+ *
+ * When GuC captured data is ready, GuC will send message
+ * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
+ * called to process the data comes with the message.
+ *
+ * Returns: None
+ */
+void xe_guc_capture_process(struct xe_guc *guc)
+{
+ if (guc->capture)
+ __guc_capture_process_output(guc);
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_alloc_one_node(struct xe_guc *guc)
+{
+ struct drm_device *drm = guc_to_drm(guc);
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
+ sizeof(struct guc_mmio_reg), GFP_KERNEL);
+ if (!new->reginfo[i].regs) {
+ while (i)
+ drmm_kfree(drm, new->reginfo[--i].regs);
+ drmm_kfree(drm, new);
+ return NULL;
+ }
+ }
+ guc_capture_init_node(guc, new);
+
+ return new;
+}
+
+static void
+__guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+ struct __guc_capture_parsed_output *node = NULL;
+ int i;
+
+ for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
+ node = guc_capture_alloc_one_node(guc);
+ if (!node) {
+ xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
+ /* dont free the priors, use what we got and cleanup at shutdown */
+ return;
+ }
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+}
+
+static int
+guc_get_max_reglist_count(struct xe_guc *guc)
+{
+ int i, j, k, tmp, maxregcount = 0;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+ for (j = 0; j < GUC_STATE_CAPTURE_TYPE_MAX; ++j) {
+ for (k = 0; k < GUC_CAPTURE_LIST_CLASS_MAX; ++k) {
+ const struct __guc_mmio_reg_descr_group *match;
+
+ if (j == GUC_STATE_CAPTURE_TYPE_GLOBAL && k > 0)
+ continue;
+
+ tmp = 0;
+ match = guc_capture_get_one_list(guc->capture->reglists, i, j, k);
+ if (match)
+ tmp = match->num_regs;
+
+ match = guc_capture_get_one_list(guc->capture->extlists, i, j, k);
+ if (match)
+ tmp += match->num_regs;
+
+ if (tmp > maxregcount)
+ maxregcount = tmp;
+ }
+ }
+ }
+ if (!maxregcount)
+ maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
+
+ return maxregcount;
+}
+
+static void
+guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+ /* skip if we've already done the pre-alloc */
+ if (guc->capture->max_mmio_per_node)
+ return;
+
+ guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
+ __guc_capture_create_prealloc_nodes(guc);
+}
+
+static void
+read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_group *list,
+ struct guc_mmio_reg *regs)
+{
+ int i;
+
+ if (!list || !list->list || list->num_regs == 0)
+ return;
+
+ if (!regs)
+ return;
+
+ for (i = 0; i < list->num_regs; i++) {
+ struct __guc_mmio_reg_descr desc = list->list[i];
+ u32 value;
+
+ if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+ value = xe_hw_engine_mmio_read32(hwe, desc.reg);
+ } else {
+ if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+ FIELD_GET(GUC_REGSET_STEERING_NEEDED, desc.flags)) {
+ int group, instance;
+
+ group = FIELD_GET(GUC_REGSET_STEERING_GROUP, desc.flags);
+ instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, desc.flags);
+ value = xe_gt_mcr_unicast_read(hwe->gt, XE_REG_MCR(desc.reg.addr),
+ group, instance);
+ } else {
+ value = xe_mmio_read32(&hwe->gt->mmio, desc.reg);
+ }
+ }
+
+ regs[i].value = value;
+ regs[i].offset = desc.reg.addr;
+ regs[i].flags = desc.flags;
+ regs[i].mask = desc.mask;
+ }
+}
+
+/**
+ * xe_engine_manual_capture - Take a manual engine snapshot from engine.
+ * @hwe: Xe HW Engine.
+ * @snapshot: The engine snapshot
+ *
+ * Take engine snapshot from engine read.
+ *
+ * Returns: None
+ */
+void
+xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_devcoredump *devcoredump = &xe->devcoredump;
+ enum guc_capture_list_class_type capture_class;
+ const struct __guc_mmio_reg_descr_group *list;
+ struct __guc_capture_parsed_output *new;
+ enum guc_state_capture_type type;
+ u16 guc_id = 0;
+ u32 lrca = 0;
+
+ if (IS_SRIOV_VF(xe))
+ return;
+
+ new = guc_capture_get_prealloc_node(guc);
+ if (!new)
+ return;
+
+ capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
+ for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
+ struct gcap_reg_list_info *reginfo = &new->reginfo[type];
+ /*
+ * regsinfo->regs is allocated based on guc->capture->max_mmio_per_node
+ * which is based on the descriptor list driving the population so
+ * should not overflow
+ */
+
+ /* Get register list for the type/class */
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
+ capture_class, false);
+ if (!list) {
+ xe_gt_dbg(gt, "Empty GuC capture register descriptor for %s",
+ hwe->name);
+ continue;
+ }
+
+ read_reg_to_node(hwe, list, reginfo->regs);
+ reginfo->num_regs = list->num_regs;
+
+ /* Capture steering registers for rcs/ccs */
+ if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
+ type, capture_class, true);
+ if (list) {
+ read_reg_to_node(hwe, list, &reginfo->regs[reginfo->num_regs]);
+ reginfo->num_regs += list->num_regs;
+ }
+ }
+ }
+
+ if (devcoredump && devcoredump->captured) {
+ struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
+
+ if (ge) {
+ guc_id = ge->guc.id;
+ if (ge->lrc[0])
+ lrca = ge->lrc[0]->context_desc;
+ }
+ }
+
+ new->eng_class = xe_engine_class_to_guc_class(hwe->class);
+ new->eng_inst = hwe->instance;
+ new->guc_id = guc_id;
+ new->lrca = lrca;
+ new->is_partial = 0;
+ new->locked = 1;
+ new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
+
+ guc_capture_add_node_to_outlist(guc->capture, new);
+ devcoredump->snapshot.matched_node = new;
+}
+
+static struct guc_mmio_reg *
+guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags)
+{
+ int i;
+
+ if (reginfo && reginfo->num_regs > 0) {
+ struct guc_mmio_reg *regs = reginfo->regs;
+
+ if (regs)
+ for (i = 0; i < reginfo->num_regs; i++)
+ if (regs[i].offset == addr && regs[i].flags == flags)
+ return &regs[i];
+ }
+
+ return NULL;
+}
+
+static void
+snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p,
+ u32 type, const struct __guc_mmio_reg_descr_group *list)
+{
+ struct xe_gt *gt = snapshot->hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_devcoredump *devcoredump = &xe->devcoredump;
+ struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot;
+ struct gcap_reg_list_info *reginfo = NULL;
+ u32 last_value, i;
+ bool is_ext;
+
+ if (!list || list->num_regs == 0)
+ return;
+ XE_WARN_ON(!devcore_snapshot->matched_node);
+
+ is_ext = list == guc->capture->extlists;
+ reginfo = &devcore_snapshot->matched_node->reginfo[type];
+
+ /*
+ * loop through descriptor first and find the register in the node
+ * this is more scalable for developer maintenance as it will ensure
+ * the printout matched the ordering of the static descriptor
+ * table-of-lists
+ */
+ for (i = 0; i < list->num_regs; i++) {
+ const struct __guc_mmio_reg_descr *reg_desc = &list->list[i];
+ struct guc_mmio_reg *reg;
+ u32 value;
+
+ reg = guc_capture_find_reg(reginfo, reg_desc->reg.addr, reg_desc->flags);
+ if (!reg)
+ continue;
+
+ value = reg->value;
+ if (reg_desc->data_type == REG_64BIT_LOW_DW) {
+ last_value = value;
+ /* Low 32 bit dword saved, continue for high 32 bit */
+ continue;
+ } else if (reg_desc->data_type == REG_64BIT_HI_DW) {
+ u64 value_qw = ((u64)value << 32) | last_value;
+
+ drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw);
+ continue;
+ }
+
+ if (is_ext) {
+ int dss, group, instance;
+
+ group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
+ instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
+ dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
+
+ drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
+ } else {
+ drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
+ }
+ }
+}
+
+/**
+ * xe_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given Xe HW Engine snapshot object.
+ */
+void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
+{
+ const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
+ "full-capture",
+ "partial-capture"
+ };
+ int type;
+ const struct __guc_mmio_reg_descr_group *list;
+ enum guc_capture_list_class_type capture_class;
+
+ struct xe_gt *gt;
+ struct xe_device *xe;
+ struct xe_devcoredump *devcoredump;
+ struct xe_devcoredump_snapshot *devcore_snapshot;
+
+ if (!snapshot)
+ return;
+
+ gt = snapshot->hwe->gt;
+ xe = gt_to_xe(gt);
+ devcoredump = &xe->devcoredump;
+ devcore_snapshot = &devcoredump->snapshot;
+
+ if (!devcore_snapshot->matched_node)
+ return;
+
+ xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC);
+ xe_gt_assert(gt, snapshot->hwe);
+
+ capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
+
+ drm_printf(p, "%s (physical), logical instance=%d\n",
+ snapshot->name ? snapshot->name : "",
+ snapshot->logical_instance);
+ drm_printf(p, "\tCapture_source: %s\n",
+ snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual");
+ drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
+ drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
+ snapshot->forcewake.domain, snapshot->forcewake.ref);
+ drm_printf(p, "\tReserved: %s\n",
+ str_yes_no(snapshot->kernel_reserved));
+
+ for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
+ capture_class, false);
+ snapshot_print_by_list_order(snapshot, p, type, list);
+ }
+
+ if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ capture_class, true);
+ snapshot_print_by_list_order(snapshot, p, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ list);
+ }
+
+ drm_puts(p, "\n");
+}
+
+/**
+ * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job.
+ * @job: The job object.
+ *
+ * Search within the capture outlist for the job, could be used for check if
+ * GuC capture is ready for the job.
+ * If found, the locked boolean of the node will be flagged.
+ *
+ * Returns: found guc-capture node ptr else NULL
+ */
+struct __guc_capture_parsed_output *
+xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_exec_queue *q;
+ struct xe_device *xe;
+ u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
+ struct xe_devcoredump_snapshot *ss;
+
+ if (!job)
+ return NULL;
+
+ q = job->q;
+ if (!q || !q->gt)
+ return NULL;
+
+ xe = gt_to_xe(q->gt);
+ if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe))
+ return NULL;
+
+ ss = &xe->devcoredump.snapshot;
+ if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
+ return ss->matched_node;
+
+ /* Find hwe for the job */
+ for_each_hw_engine(hwe, q->gt, id) {
+ if (hwe != q->hwe)
+ continue;
+ guc_class = xe_engine_class_to_guc_class(hwe->class);
+ break;
+ }
+
+ if (guc_class <= GUC_LAST_ENGINE_CLASS) {
+ struct __guc_capture_parsed_output *n, *ntmp;
+ struct xe_guc *guc = &q->gt->uc.guc;
+ u16 guc_id = q->guc->id;
+ u32 lrca = xe_lrc_ggtt_addr(q->lrc[0]);
+
+ /*
+ * Look for a matching GuC reported error capture node from
+ * the internal output link-list based on engine, guc id and
+ * lrca info.
+ */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+ if (n->eng_class == guc_class && n->eng_inst == hwe->instance &&
+ n->guc_id == guc_id && n->lrca == lrca &&
+ n->source == XE_ENGINE_CAPTURE_SOURCE_GUC) {
+ n->locked = 1;
+ return n;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine
+ * @job: The job object
+ *
+ * Take snapshot of associated HW Engine
+ *
+ * Returns: None.
+ */
+void
+xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
+{
+ struct xe_exec_queue *q = job->q;
+ struct xe_device *xe = gt_to_xe(q->gt);
+ struct xe_devcoredump *coredump = &xe->devcoredump;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ u32 adj_logical_mask = q->logical_mask;
+
+ if (IS_SRIOV_VF(xe))
+ return;
+
+ for_each_hw_engine(hwe, q->gt, id) {
+ if (hwe->class != q->hwe->class ||
+ !(BIT(hwe->logical_instance) & adj_logical_mask)) {
+ coredump->snapshot.hwe[id] = NULL;
+ continue;
+ }
+
+ if (!coredump->snapshot.hwe[id]) {
+ coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job);
+ } else {
+ struct __guc_capture_parsed_output *new;
+
+ new = xe_guc_capture_get_matching_and_lock(job);
+ if (new) {
+ struct xe_guc *guc = &q->gt->uc.guc;
+
+ /*
+ * If we are in here, it means we found a fresh
+ * GuC-err-capture node for this engine after
+ * previously failing to find a match in the
+ * early part of guc_exec_queue_timedout_job.
+ * Thus we must free the manually captured node
+ */
+ guc_capture_free_outlist_node(guc->capture,
+ coredump->snapshot.matched_node);
+ coredump->snapshot.matched_node = new;
+ }
+ }
+
+ break;
+ }
+}
+
+/*
+ * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes
+ * @guc: The GuC object
+ *
+ * Free matched node and all nodes with the equal guc_id from
+ * GuC captured outlist
+ */
+void xe_guc_capture_put_matched_nodes(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_devcoredump *devcoredump = &xe->devcoredump;
+ struct __guc_capture_parsed_output *n = devcoredump->snapshot.matched_node;
+
+ if (n) {
+ guc_capture_remove_stale_matches_from_list(guc->capture, n);
+ guc_capture_free_outlist_node(guc->capture, n);
+ devcoredump->snapshot.matched_node = NULL;
+ }
+}
+
+/*
+ * xe_guc_capture_steered_list_init - Init steering register list
+ * @guc: The GuC object
+ *
+ * Init steering register list for GuC register capture, create pre-alloc node
+ */
+void xe_guc_capture_steered_list_init(struct xe_guc *guc)
+{
+ /*
+ * For certain engine classes, there are slice and subslice
+ * level registers requiring steering. We allocate and populate
+ * these based on hw config and add it as an extension list at
+ * the end of the pre-populated render list.
+ */
+ guc_capture_alloc_steered_lists(guc);
+ check_guc_capture_size(guc);
+ guc_capture_create_prealloc_nodes(guc);
+}
+
+/*
+ * xe_guc_capture_init - Init for GuC register capture
+ * @guc: The GuC object
+ *
+ * Init for GuC register capture, alloc memory for capture data structure.
+ *
+ * Returns: 0 if success.
+ * -ENOMEM if out of memory
+ */
+int xe_guc_capture_init(struct xe_guc *guc)
+{
+ guc->capture = drmm_kzalloc(guc_to_drm(guc), sizeof(*guc->capture), GFP_KERNEL);
+ if (!guc->capture)
+ return -ENOMEM;
+
+ guc->capture->reglists = guc_capture_get_device_reglist(guc_to_xe(guc));
+
+ INIT_LIST_HEAD(&guc->capture->outlist);
+ INIT_LIST_HEAD(&guc->capture->cachelist);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h
new file mode 100644
index 000000000000..97a795d13dd1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_capture.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CAPTURE_H
+#define _XE_GUC_CAPTURE_H
+
+#include <linux/types.h>
+#include "abi/guc_capture_abi.h"
+#include "xe_guc.h"
+#include "xe_guc_fwif.h"
+
+struct xe_guc;
+struct xe_hw_engine;
+struct xe_hw_engine_snapshot;
+struct xe_sched_job;
+
+static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class)
+{
+ switch (class) {
+ case GUC_RENDER_CLASS:
+ case GUC_COMPUTE_CLASS:
+ return GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE;
+ case GUC_GSC_OTHER_CLASS:
+ return GUC_CAPTURE_LIST_CLASS_GSC_OTHER;
+ case GUC_VIDEO_CLASS:
+ case GUC_VIDEOENHANCE_CLASS:
+ case GUC_BLITTER_CLASS:
+ return class;
+ default:
+ XE_WARN_ON(class);
+ return GUC_CAPTURE_LIST_CLASS_MAX;
+ }
+}
+
+static inline enum guc_capture_list_class_type
+xe_engine_class_to_guc_capture_class(enum xe_engine_class class)
+{
+ return xe_guc_class_to_capture_class(xe_engine_class_to_guc_class(class));
+}
+
+void xe_guc_capture_process(struct xe_guc *guc);
+int xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, void **outptr);
+int xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, size_t *size);
+int xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size);
+size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc);
+const struct __guc_mmio_reg_descr_group *
+xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, bool is_ext);
+struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job);
+void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot);
+void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p);
+void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job);
+void xe_guc_capture_steered_list_init(struct xe_guc *guc);
+void xe_guc_capture_put_matched_nodes(struct xe_guc *guc);
+int xe_guc_capture_init(struct xe_guc *guc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
new file mode 100644
index 000000000000..2057125b1bfa
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CAPTURE_TYPES_H
+#define _XE_GUC_CAPTURE_TYPES_H
+
+#include <linux/types.h>
+#include "regs/xe_reg_defs.h"
+
+struct xe_guc;
+
+/* data type of the register in register list */
+enum capture_register_data_type {
+ REG_32BIT = 0,
+ REG_64BIT_LOW_DW,
+ REG_64BIT_HI_DW,
+};
+
+/**
+ * struct __guc_mmio_reg_descr - GuC mmio register descriptor
+ *
+ * xe_guc_capture module uses these structures to define a register
+ * (offsets, names, flags,...) that are used at the ADS regisration
+ * time as well as during runtime processing and reporting of error-
+ * capture states generated by GuC just prior to engine reset events.
+ */
+struct __guc_mmio_reg_descr {
+ /** @reg: the register */
+ struct xe_reg reg;
+ /**
+ * @data_type: data type of the register
+ * Could be 32 bit, low or hi dword of a 64 bit, see enum
+ * register_data_type
+ */
+ enum capture_register_data_type data_type;
+ /** @flags: Flags for the register */
+ u32 flags;
+ /** @mask: The mask to apply */
+ u32 mask;
+ /** @regname: Name of the register */
+ const char *regname;
+};
+
+/**
+ * struct __guc_mmio_reg_descr_group - The group of register descriptor
+ *
+ * xe_guc_capture module uses these structures to maintain static
+ * tables (per unique platform) that consists of lists of registers
+ * (offsets, names, flags,...) that are used at the ADS regisration
+ * time as well as during runtime processing and reporting of error-
+ * capture states generated by GuC just prior to engine reset events.
+ */
+struct __guc_mmio_reg_descr_group {
+ /** @list: The register list */
+ const struct __guc_mmio_reg_descr *list;
+ /** @num_regs: Count of registers in the list */
+ u32 num_regs;
+ /** @owner: PF/VF owner, see enum guc_capture_list_index_type */
+ u32 owner;
+ /** @type: Capture register type, see enum guc_state_capture_type */
+ u32 type;
+ /** @engine: The engine class, see enum guc_capture_list_class_type */
+ u32 engine;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 9c505d3517cd..8aeb1789805c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -8,6 +8,7 @@
#include <linux/bitfield.h>
#include <linux/circ_buf.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <kunit/static_stub.h>
@@ -17,6 +18,7 @@
#include "abi/guc_actions_sriov_abi.h"
#include "abi/guc_klvs_abi.h"
#include "xe_bo.h"
+#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_pagefault.h"
@@ -25,12 +27,48 @@
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_guc.h"
+#include "xe_guc_log.h"
#include "xe_guc_relay.h"
#include "xe_guc_submit.h"
#include "xe_map.h"
#include "xe_pm.h"
#include "xe_trace_guc.h"
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+enum {
+ /* Internal states, not error conditions */
+ CT_DEAD_STATE_REARM, /* 0x0001 */
+ CT_DEAD_STATE_CAPTURE, /* 0x0002 */
+
+ /* Error conditions */
+ CT_DEAD_SETUP, /* 0x0004 */
+ CT_DEAD_H2G_WRITE, /* 0x0008 */
+ CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */
+ CT_DEAD_G2H_READ, /* 0x0020 */
+ CT_DEAD_G2H_RECV, /* 0x0040 */
+ CT_DEAD_G2H_RELEASE, /* 0x0080 */
+ CT_DEAD_DEADLOCK, /* 0x0100 */
+ CT_DEAD_PROCESS_FAILED, /* 0x0200 */
+ CT_DEAD_FAST_G2H, /* 0x0400 */
+ CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */
+ CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
+ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
+ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+};
+
+static void ct_dead_worker_func(struct work_struct *w);
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+#else
+#define CT_DEAD(ct, ctb, reason) \
+ do { \
+ struct guc_ctb *_ctb = (ctb); \
+ if (_ctb) \
+ _ctb->info.broken = true; \
+ } while (0)
+#endif
+
/* Used when a CT send wants to block and / or receive data */
struct g2h_fence {
u32 *response_buffer;
@@ -175,14 +213,18 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
- ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM);
if (!ct->g2h_wq)
return -ENOMEM;
spin_lock_init(&ct->fast_lock);
xa_init(&ct->fence_lookup);
INIT_WORK(&ct->g2h_worker, g2h_worker_func);
- INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#endif
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
@@ -209,6 +251,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
ct->state = XE_GUC_CT_STATE_DISABLED;
return 0;
}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
#define desc_read(xe_, guc_ctb__, field_) \
xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
@@ -395,6 +438,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -419,10 +463,22 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
if (ct_needs_safe_mode(ct))
ct_enter_safe_mode(ct);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /*
+ * The CT has now been reset so the dumper can be re-armed
+ * after any existing dead state has been dumped.
+ */
+ spin_lock_irq(&ct->dead.lock);
+ if (ct->dead.reason)
+ ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ spin_unlock_irq(&ct->dead.lock);
+#endif
+
return 0;
err_out:
xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, NULL, SETUP);
return err;
}
@@ -466,6 +522,19 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
if (cmd_len > h2g->info.space) {
h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+
+ if (h2g->info.head > h2g->info.size) {
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 desc_status = desc_read(xe, h2g, status);
+
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+
+ xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n",
+ h2g->info.head, h2g->info.size);
+ CT_DEAD(ct, h2g, H2G_HAS_ROOM);
+ return false;
+ }
+
h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
h2g->info.size) -
h2g->info.resv_space;
@@ -521,10 +590,24 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
{
+ bool bad = false;
+
lockdep_assert_held(&ct->fast_lock);
- xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
- ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
- xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding);
+
+ bad = ct->ctbs.g2h.info.space + g2h_len >
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space;
+ bad |= !ct->g2h_outstanding;
+
+ if (bad) {
+ xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n",
+ ct->ctbs.g2h.info.space, g2h_len,
+ ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space,
+ ct->ctbs.g2h.info.space + g2h_len,
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space,
+ ct->g2h_outstanding);
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE);
+ return;
+ }
ct->ctbs.g2h.info.space += g2h_len;
if (!--ct->g2h_outstanding)
@@ -551,12 +634,43 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 full_len;
struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
tail * sizeof(u32));
+ u32 desc_status;
full_len = len + GUC_CTB_HDR_LEN;
lockdep_assert_held(&ct->lock);
xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
- xe_gt_assert(gt, tail <= h2g->info.size);
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, h2g, tail);
+ u32 desc_head = desc_read(xe, h2g, head);
+
+ if (tail != desc_tail) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
+ goto corrupted;
+ }
+
+ if (tail > h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
+ tail, h2g->info.size);
+ goto corrupted;
+ }
+
+ if (desc_head >= h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n",
+ desc_head, h2g->info.size);
+ goto corrupted;
+ }
+ }
/* Command will wrap, zero fill (NOPs), return and check credits again */
if (tail + full_len > h2g->info.size) {
@@ -609,6 +723,10 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
desc_read(xe, h2g, head), h2g->info.tail);
return 0;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE);
+ return -EPIPE;
}
/*
@@ -716,7 +834,6 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
- struct drm_printer p = xe_gt_info_printer(gt);
unsigned int sleep_period_ms = 1;
int ret;
@@ -769,8 +886,13 @@ try_again:
goto broken;
#undef g2h_avail
- if (dequeue_one_g2h(ct) < 0)
+ ret = dequeue_one_g2h(ct);
+ if (ret < 0) {
+ if (ret != -ECANCELED)
+ xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
+ ERR_PTR(ret));
goto broken;
+ }
goto try_again;
}
@@ -779,8 +901,7 @@ try_again:
broken:
xe_gt_err(gt, "No forward process on H2G, reset required\n");
- xe_guc_ct_print(ct, &p, true);
- ct->ctbs.h2g.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK);
return -EDEADLK;
}
@@ -848,7 +969,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
#define ct_alive(ct) \
(xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
!ct->ctbs.g2h.info.broken)
- if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
return false;
#undef ct_alive
@@ -890,7 +1011,7 @@ retry_same_fence:
goto retry_same_fence;
if (!g2h_fence_needs_alloc(&g2h_fence))
- xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
return ret;
}
@@ -916,7 +1037,7 @@ retry_same_fence:
if (!ret) {
xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
- xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
mutex_unlock(&ct->lock);
return -ETIME;
}
@@ -1028,6 +1149,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
else
xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
type, fence);
+ CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
return -EPROTO;
}
@@ -1035,6 +1157,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
g2h_fence = xa_erase(&ct->fence_lookup, fence);
if (unlikely(!g2h_fence)) {
/* Don't tear down channel, as send could've timed out */
+ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
return 0;
@@ -1079,7 +1202,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
origin);
- ct->ctbs.g2h.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN);
return -EPROTO;
}
@@ -1097,7 +1220,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
default:
xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
type);
- ct->ctbs.g2h.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE);
ret = -EOPNOTSUPP;
}
@@ -1140,6 +1263,8 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
/* Selftest only at the moment */
break;
case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+ break;
case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
/* FIXME: Handle this */
break;
@@ -1174,9 +1299,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
- if (ret)
+ if (ret) {
xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, PROCESS_FAILED);
+ }
return 0;
}
@@ -1186,7 +1313,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
struct guc_ctb *g2h = &ct->ctbs.g2h;
- u32 tail, head, len;
+ u32 tail, head, len, desc_status;
s32 avail;
u32 action;
u32 *hxg;
@@ -1205,6 +1332,63 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+ desc_status = desc_read(xe, g2h, status);
+ if (desc_status) {
+ if (desc_status & GUC_CTB_STATUS_DISABLED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
+ desc_status &= ~GUC_CTB_STATUS_DISABLED;
+ }
+
+ if (desc_status) {
+ xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, g2h, tail);
+ /*
+ u32 desc_head = desc_read(xe, g2h, head);
+
+ * info.head and desc_head are updated back-to-back at the end of
+ * this function and nowhere else. Hence, they cannot be different
+ * unless two g2h_read calls are running concurrently. Which is not
+ * possible because it is guarded by ct->fast_lock. And yet, some
+ * discrete platforms are reguarly hitting this error :(.
+ *
+ * desc_head rolling backwards shouldn't cause any noticeable
+ * problems - just a delay in GuC being allowed to proceed past that
+ * point in the queue. So for now, just disable the error until it
+ * can be root caused.
+ *
+ if (g2h->info.head != desc_head) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT read: head was modified %u != %u\n",
+ desc_head, g2h->info.head);
+ goto corrupted;
+ }
+ */
+
+ if (g2h->info.head > g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
+ g2h->info.head, g2h->info.size);
+ goto corrupted;
+ }
+
+ if (desc_tail >= g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n",
+ desc_tail, g2h->info.size);
+ goto corrupted;
+ }
+ }
+
/* Calculate DW available to read */
tail = desc_read(xe, g2h, tail);
avail = tail - g2h->info.head;
@@ -1221,9 +1405,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
if (len > avail) {
xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
avail, len);
- g2h->info.broken = true;
-
- return -EPROTO;
+ goto corrupted;
}
head = (g2h->info.head + 1) % g2h->info.size;
@@ -1269,6 +1451,10 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
action, len, g2h->info.head, tail);
return len;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ);
+ return -EPROTO;
}
static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
@@ -1295,9 +1481,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
xe_gt_warn(gt, "NOT_POSSIBLE");
}
- if (ret)
+ if (ret) {
xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, FAST_G2H);
+ }
}
/**
@@ -1357,7 +1545,6 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct)
static void receive_g2h(struct xe_guc_ct *ct)
{
- struct xe_gt *gt = ct_to_gt(ct);
bool ongoing;
int ret;
@@ -1394,9 +1581,8 @@ static void receive_g2h(struct xe_guc_ct *ct)
mutex_unlock(&ct->lock);
if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
- struct drm_printer p = xe_gt_info_printer(gt);
-
- xe_guc_ct_print(ct, &p, false);
+ xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
+ CT_DEAD(ct, NULL, G2H_RECV);
kick_reset(ct);
}
} while (ret == 1);
@@ -1412,49 +1598,34 @@ static void g2h_worker_func(struct work_struct *w)
receive_g2h(ct);
}
-static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
- struct guc_ctb_snapshot *snapshot,
- bool atomic)
+static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic,
+ bool want_ctb)
{
- u32 head, tail;
-
- xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
- sizeof(struct guc_ct_buffer_desc));
- memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+ struct xe_guc_ct_snapshot *snapshot;
- snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
- atomic ? GFP_ATOMIC : GFP_KERNEL);
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
- if (!snapshot->cmds) {
- drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
- return;
+ if (ct->bo && want_ctb) {
+ snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
}
- head = snapshot->desc.head;
- tail = snapshot->desc.tail;
-
- if (head != tail) {
- struct iosys_map map =
- IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
-
- while (head != tail) {
- snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
- ++head;
- if (head == ctb->info.size) {
- head = 0;
- map = ctb->cmds;
- } else {
- iosys_map_incr(&map, sizeof(u32));
- }
- }
- }
+ return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+ struct guc_ctb_snapshot *snapshot)
+{
+ xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+ sizeof(struct guc_ct_buffer_desc));
+ memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
}
static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
struct drm_printer *p)
{
- u32 head, tail;
-
drm_printf(p, "\tsize: %d\n", snapshot->info.size);
drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
drm_printf(p, "\thead: %d\n", snapshot->info.head);
@@ -1464,63 +1635,46 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+}
- if (!snapshot->cmds)
- return;
+static struct xe_guc_ct_snapshot *guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic,
+ bool want_ctb)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc_ct_snapshot *snapshot;
- head = snapshot->desc.head;
- tail = snapshot->desc.tail;
+ snapshot = guc_ct_snapshot_alloc(ct, atomic, want_ctb);
+ if (!snapshot) {
+ xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
+ return NULL;
+ }
- while (head != tail) {
- drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
- snapshot->cmds[head]);
- ++head;
- if (head == snapshot->info.size)
- head = 0;
+ if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
+ snapshot->ct_enabled = true;
+ snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
}
-}
-static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
-{
- kfree(snapshot->cmds);
+ if (ct->bo && snapshot->ctb)
+ xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
+ return snapshot;
}
/**
* xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
* @ct: GuC CT object.
- * @atomic: Boolean to indicate if this is called from atomic context like
- * reset or CTB handler or from some regular path like debugfs.
*
* This can be printed out in a later stage like during dev_coredump
- * analysis.
+ * analysis. This is safe to be called during atomic context.
*
* Returns: a GuC CT snapshot object that must be freed by the caller
* by using `xe_guc_ct_snapshot_free`.
*/
-struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
- bool atomic)
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct)
{
- struct xe_device *xe = ct_to_xe(ct);
- struct xe_guc_ct_snapshot *snapshot;
-
- snapshot = kzalloc(sizeof(*snapshot),
- atomic ? GFP_ATOMIC : GFP_KERNEL);
-
- if (!snapshot) {
- drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
- return NULL;
- }
-
- if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
- snapshot->ct_enabled = true;
- snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
- guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
- &snapshot->h2g, atomic);
- guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
- &snapshot->g2h, atomic);
- }
-
- return snapshot;
+ return guc_ct_snapshot_capture(ct, true, true);
}
/**
@@ -1540,11 +1694,13 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
drm_puts(p, "H2G CTB (all sizes in DW):\n");
guc_ctb_snapshot_print(&snapshot->h2g, p);
- drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
+ drm_puts(p, "G2H CTB (all sizes in DW):\n");
guc_ctb_snapshot_print(&snapshot->g2h, p);
-
drm_printf(p, "\tg2h outstanding: %d\n",
snapshot->g2h_outstanding);
+
+ if (snapshot->ctb)
+ xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
} else {
drm_puts(p, "CT disabled\n");
}
@@ -1562,8 +1718,7 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
if (!snapshot)
return;
- guc_ctb_snapshot_free(&snapshot->h2g);
- guc_ctb_snapshot_free(&snapshot->g2h);
+ kfree(snapshot->ctb);
kfree(snapshot);
}
@@ -1571,16 +1726,121 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
* xe_guc_ct_print - GuC CT Print.
* @ct: GuC CT.
* @p: drm_printer where it will be printed out.
- * @atomic: Boolean to indicate if this is called from atomic context like
- * reset or CTB handler or from some regular path like debugfs.
+ * @want_ctb: Should the full CTB content be dumped (vs just the headers)
*
- * This function quickly capture a snapshot and immediately print it out.
+ * This function will quickly capture a snapshot of the CT state
+ * and immediately print it out.
*/
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
{
struct xe_guc_ct_snapshot *snapshot;
- snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
+ snapshot = guc_ct_snapshot_capture(ct, false, want_ctb);
xe_guc_ct_snapshot_print(snapshot, p);
xe_guc_ct_snapshot_free(snapshot);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
+{
+ struct xe_guc_log_snapshot *snapshot_log;
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ struct xe_guc *guc = ct_to_guc(ct);
+ unsigned long flags;
+ bool have_capture;
+
+ if (ctb)
+ ctb->info.broken = true;
+
+ /* Ignore further errors after the first dump until a reset */
+ if (ct->dead.reported)
+ return;
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ /* And only capture one dump at a time */
+ have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
+ ct->dead.reason |= (1 << reason_code) |
+ (1 << CT_DEAD_STATE_CAPTURE);
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ if (have_capture)
+ return;
+
+ snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct));
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ if (ct->dead.snapshot_log || ct->dead.snapshot_ct) {
+ xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n");
+ xe_guc_log_snapshot_free(snapshot_log);
+ xe_guc_ct_snapshot_free(snapshot_ct);
+ } else {
+ ct->dead.snapshot_log = snapshot_log;
+ ct->dead.snapshot_ct = snapshot_ct;
+ }
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ queue_work(system_unbound_wq, &(ct)->dead.worker);
+}
+
+static void ct_dead_print(struct xe_dead_ct *dead)
+{
+ struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead);
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ if (!dead->reason) {
+ xe_gt_err(gt, "CTB is dead for no reason!?\n");
+ return;
+ }
+
+ drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
+
+ /* Can't generate a genuine core dump at this point, so just do the good bits */
+ drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ xe_device_snapshot_print(xe, &lp);
+
+ drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
+ drm_printf(&lp, "\tTile: %d\n", gt->tile->id);
+
+ drm_puts(&lp, "**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(dead->snapshot_log, &lp);
+
+ drm_puts(&lp, "**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp);
+
+ drm_puts(&lp, "Done.\n");
+}
+
+static void ct_dead_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker);
+
+ if (!ct->dead.reported) {
+ ct->dead.reported = true;
+ ct_dead_print(&ct->dead);
+ }
+
+ spin_lock_irq(&ct->dead.lock);
+
+ xe_guc_log_snapshot_free(ct->dead.snapshot_log);
+ ct->dead.snapshot_log = NULL;
+ xe_guc_ct_snapshot_free(ct->dead.snapshot_ct);
+ ct->dead.snapshot_ct = NULL;
+
+ if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) {
+ /* A reset has occurred so re-arm the error reporting */
+ ct->dead.reason = 0;
+ ct->dead.reported = false;
+ }
+
+ spin_unlock_irq(&ct->dead.lock);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 190202fce2d0..82c4ae458dda 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -9,6 +9,7 @@
#include "xe_guc_ct_types.h"
struct drm_printer;
+struct xe_device;
int xe_guc_ct_init(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
@@ -16,12 +17,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
-struct xe_guc_ct_snapshot *
-xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
-void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
- struct drm_printer *p);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 761cb9031298..8e1b9d981d61 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -52,8 +52,6 @@ struct guc_ctb {
struct guc_ctb_snapshot {
/** @desc: snapshot of the CTB descriptor */
struct guc_ct_buffer_desc desc;
- /** @cmds: snapshot of the CTB commands */
- u32 *cmds;
/** @info: snapshot of the CTB info */
struct guc_ctb_info info;
};
@@ -70,6 +68,10 @@ struct xe_guc_ct_snapshot {
struct guc_ctb_snapshot g2h;
/** @h2g: H2G CTB snapshot */
struct guc_ctb_snapshot h2g;
+ /** @ctb_size: size of the snapshot of the CTB */
+ size_t ctb_size;
+ /** @ctb: snapshot of the entire CTB */
+ u32 *ctb;
};
/**
@@ -86,6 +88,24 @@ enum xe_guc_ct_state {
XE_GUC_CT_STATE_ENABLED,
};
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+/** struct xe_dead_ct - Information for debugging a dead CT */
+struct xe_dead_ct {
+ /** @lock: protects memory allocation/free operations, and @reason updates */
+ spinlock_t lock;
+ /** @reason: bit mask of CT_DEAD_* reason codes */
+ unsigned int reason;
+ /** @reported: for preventing multiple dumps per error sequence */
+ bool reported;
+ /** @worker: worker thread to get out of interrupt context before dumping */
+ struct work_struct worker;
+ /** snapshot_ct: copy of CT state and CTB content at point of error */
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ /** snapshot_log: copy of GuC log at point of error */
+ struct xe_guc_log_snapshot *snapshot_log;
+};
+#endif
+
/**
* struct xe_guc_ct - GuC command transport (CT) layer
*
@@ -128,6 +148,11 @@ struct xe_guc_ct {
u32 msg[GUC_CTB_MSG_MAX_LEN];
/** @fast_msg: Message buffer */
u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /** @dead: information for debugging dead CTs */
+ struct xe_dead_ct dead;
+#endif
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index d3822cbea273..995b306aced7 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -47,9 +47,23 @@ static int guc_log(struct seq_file *m, void *data)
return 0;
}
+static int guc_ctb(struct seq_file *m, void *data)
+{
+ struct xe_guc *guc = node_to_guc(m->private);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_pm_runtime_get(xe);
+ xe_guc_ct_print(&guc->ct, &p, true);
+ xe_pm_runtime_put(xe);
+
+ return 0;
+}
+
static const struct drm_info_list debugfs_list[] = {
{"guc_info", guc_info, 0},
{"guc_log", guc_log, 0},
+ {"guc_ctb", guc_ctb, 0},
};
void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 19ee71aeaf17..08ffe59f22fa 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -8,7 +8,9 @@
#include <linux/bits.h>
+#include "abi/guc_capture_abi.h"
#include "abi/guc_klvs_abi.h"
+#include "xe_hw_engine_types.h"
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
@@ -103,6 +105,7 @@ struct guc_update_exec_queue_policy {
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_SLPC BIT(2)
+#define GUC_CTL_ENABLE_LITE_RESTORE BIT(4)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_DEBUG 3
@@ -157,24 +160,6 @@ struct guc_policies {
u32 reserved[4];
} __packed;
-/* GuC MMIO reg state struct */
-struct guc_mmio_reg {
- u32 offset;
- u32 value;
- u32 flags;
- u32 mask;
-#define GUC_REGSET_MASKED BIT(0)
-#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
-#define GUC_REGSET_RESTORE_ONLY BIT(3)
-} __packed;
-
-/* GuC register sets */
-struct guc_mmio_reg_set {
- u32 address;
- u16 count;
- u16 reserved;
-} __packed;
-
/* Generic GT SysInfo data types */
#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0
#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1
@@ -188,12 +173,6 @@ struct guc_gt_system_info {
u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
} __packed;
-enum {
- GUC_CAPTURE_LIST_INDEX_PF = 0,
- GUC_CAPTURE_LIST_INDEX_VF = 1,
- GUC_CAPTURE_LIST_INDEX_MAX = 2,
-};
-
/* GuC Additional Data Struct */
struct guc_ads {
struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
index da0fedbbdbaf..da10cf0389cb 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
+++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
@@ -18,6 +18,13 @@
MAKE_GUC_KLV_KEY(CONCATENATE(VF_CFG_THRESHOLD_, TAG))
/**
+ * MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN - Prepare the name of the KLV length constant.
+ * @TAG: unique tag of the GuC threshold KLV key.
+ */
+#define MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) \
+ MAKE_GUC_KLV_LEN(CONCATENATE(VF_CFG_THRESHOLD_, TAG))
+
+/**
* xe_guc_klv_threshold_key_to_index - Find index of the tracked GuC threshold.
* @key: GuC threshold KLV key.
*
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index a37ee3419428..df4cfb698cdb 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -5,13 +5,26 @@
#include "xe_guc_log.h"
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
+#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
+#include "xe_devcoredump.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_map.h"
+#include "xe_mmio.h"
#include "xe_module.h"
+static struct xe_guc *
+log_to_guc(struct xe_guc_log *log)
+{
+ return container_of(log, struct xe_guc, log);
+}
+
static struct xe_gt *
log_to_gt(struct xe_guc_log *log)
{
@@ -49,32 +62,194 @@ static size_t guc_log_size(void)
CAPTURE_BUFFER_SIZE;
}
-void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+#define GUC_LOG_CHUNK_SIZE SZ_2M
+
+static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *log, bool atomic)
+{
+ struct xe_guc_log_snapshot *snapshot;
+ size_t remain;
+ int i;
+
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
+
+ /*
+ * NB: kmalloc has a hard limit well below the maximum GuC log buffer size.
+ * Also, can't use vmalloc as might be called from atomic context. So need
+ * to break the buffer up into smaller chunks that can be allocated.
+ */
+ snapshot->size = log->bo->size;
+ snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE);
+
+ snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy),
+ atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot->copy)
+ goto fail_snap;
+
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+
+ snapshot->copy[i] = kmalloc(size, atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot->copy[i])
+ goto fail_copy;
+ remain -= size;
+ }
+
+ return snapshot;
+
+fail_copy:
+ for (i = 0; i < snapshot->num_chunks; i++)
+ kfree(snapshot->copy[i]);
+ kfree(snapshot->copy);
+fail_snap:
+ kfree(snapshot);
+ return NULL;
+}
+
+/**
+ * xe_guc_log_snapshot_free - free a previously captured GuC log snapshot
+ * @snapshot: GuC log snapshot structure
+ *
+ * Return: pointer to a newly allocated snapshot object or null if out of memory. Caller is
+ * responsible for calling xe_guc_log_snapshot_free when done with the snapshot.
+ */
+void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot)
+{
+ int i;
+
+ if (!snapshot)
+ return;
+
+ if (snapshot->copy) {
+ for (i = 0; i < snapshot->num_chunks; i++)
+ kfree(snapshot->copy[i]);
+ kfree(snapshot->copy);
+ }
+
+ kfree(snapshot);
+}
+
+/**
+ * xe_guc_log_snapshot_capture - create a new snapshot copy the GuC log for later dumping
+ * @log: GuC log structure
+ * @atomic: is the call inside an atomic section of some kind?
+ *
+ * Return: pointer to a newly allocated snapshot object or null if out of memory. Caller is
+ * responsible for calling xe_guc_log_snapshot_free when done with the snapshot.
+ */
+struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic)
{
+ struct xe_guc_log_snapshot *snapshot;
struct xe_device *xe = log_to_xe(log);
- size_t size;
- int i, j;
+ struct xe_guc *guc = log_to_guc(log);
+ struct xe_gt *gt = log_to_gt(log);
+ unsigned int fw_ref;
+ size_t remain;
+ int i;
- xe_assert(xe, log->bo);
+ if (!log->bo) {
+ xe_gt_err(gt, "GuC log buffer not allocated\n");
+ return NULL;
+ }
- size = log->bo->size;
+ snapshot = xe_guc_log_snapshot_alloc(log, atomic);
+ if (!snapshot) {
+ xe_gt_err(gt, "GuC log snapshot not allocated\n");
+ return NULL;
+ }
-#define DW_PER_READ 128
- xe_assert(xe, !(size % (DW_PER_READ * sizeof(u32))));
- for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
- u32 read[DW_PER_READ];
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
- xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32),
- DW_PER_READ * sizeof(u32));
-#define DW_PER_PRINT 4
- for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) {
- u32 *print = read + j * DW_PER_PRINT;
+ xe_map_memcpy_from(xe, snapshot->copy[i], &log->bo->vmap,
+ i * GUC_LOG_CHUNK_SIZE, size);
+ remain -= size;
+ }
- drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
- *(print + 0), *(print + 1),
- *(print + 2), *(print + 3));
- }
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref) {
+ snapshot->stamp = ~0ULL;
+ } else {
+ snapshot->stamp = xe_mmio_read64_2x32(&gt->mmio, GUC_PMTIMESTAMP_LO);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
+ snapshot->ktime = ktime_get_boottime_ns();
+ snapshot->level = log->level;
+ snapshot->ver_found = guc->fw.versions.found[XE_UC_FW_VER_RELEASE];
+ snapshot->ver_want = guc->fw.versions.wanted;
+ snapshot->path = guc->fw.path;
+
+ return snapshot;
+}
+
+/**
+ * xe_guc_log_snapshot_print - dump a previously saved copy of the GuC log to some useful location
+ * @snapshot: a snapshot of the GuC log
+ * @p: the printer object to output to
+ */
+void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p)
+{
+ size_t remain;
+ int i;
+
+ if (!snapshot) {
+ drm_printf(p, "GuC log snapshot not allocated!\n");
+ return;
+ }
+
+ drm_printf(p, "GuC firmware: %s\n", snapshot->path);
+ drm_printf(p, "GuC version: %u.%u.%u (wanted %u.%u.%u)\n",
+ snapshot->ver_found.major, snapshot->ver_found.minor, snapshot->ver_found.patch,
+ snapshot->ver_want.major, snapshot->ver_want.minor, snapshot->ver_want.patch);
+ drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", snapshot->ktime, snapshot->ktime);
+ drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp);
+ drm_printf(p, "Log level: %u\n", snapshot->level);
+
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+
+ xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size);
+ remain -= size;
+ }
+}
+
+/**
+ * xe_guc_log_print_dmesg - dump a copy of the GuC log to dmesg
+ * @log: GuC log structure
+ */
+void xe_guc_log_print_dmesg(struct xe_guc_log *log)
+{
+ struct xe_gt *gt = log_to_gt(log);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ drm_printf(&lp, "Dumping GuC log for %ps...\n", __builtin_return_address(0));
+
+ xe_guc_log_print(log, &lp);
+
+ drm_printf(&lp, "Done.\n");
+}
+
+/**
+ * xe_guc_log_print - dump a copy of the GuC log to some useful location
+ * @log: GuC log structure
+ * @p: the printer object to output to
+ */
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+ struct xe_guc_log_snapshot *snapshot;
+
+ drm_printf(p, "**** GuC Log ****\n");
+
+ snapshot = xe_guc_log_snapshot_capture(log, false);
+ drm_printf(p, "CS reference clock: %u\n", log_to_gt(log)->info.reference_clock);
+ xe_guc_log_snapshot_print(snapshot, p);
+ xe_guc_log_snapshot_free(snapshot);
}
int xe_guc_log_init(struct xe_guc_log *log)
@@ -96,3 +271,105 @@ int xe_guc_log_init(struct xe_guc_log *log)
return 0;
}
+
+ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */
+
+static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
+{
+ return CRASH_BUFFER_SIZE;
+}
+
+static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
+{
+ return DEBUG_BUFFER_SIZE;
+}
+
+/**
+ * xe_guc_log_section_size_capture - Get capture buffer size within log sections.
+ * @log: The log object.
+ *
+ * This function will return the capture buffer size within log sections.
+ *
+ * Return: capture buffer size.
+ */
+u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
+{
+ return CAPTURE_BUFFER_SIZE;
+}
+
+/**
+ * xe_guc_get_log_buffer_size - Get log buffer size for a type.
+ * @log: The log object.
+ * @type: The log buffer type
+ *
+ * Return: buffer size.
+ */
+u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type)
+{
+ switch (type) {
+ case GUC_LOG_BUFFER_CRASH_DUMP:
+ return xe_guc_log_section_size_crash(log);
+ case GUC_LOG_BUFFER_DEBUG:
+ return xe_guc_log_section_size_debug(log);
+ case GUC_LOG_BUFFER_CAPTURE:
+ return xe_guc_log_section_size_capture(log);
+ }
+ return 0;
+}
+
+/**
+ * xe_guc_get_log_buffer_offset - Get offset in log buffer for a type.
+ * @log: The log object.
+ * @type: The log buffer type
+ *
+ * This function will return the offset in the log buffer for a type.
+ * Return: buffer offset.
+ */
+u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type)
+{
+ enum guc_log_buffer_type i;
+ u32 offset = PAGE_SIZE;/* for the log_buffer_states */
+
+ for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) {
+ if (i == type)
+ break;
+ offset += xe_guc_get_log_buffer_size(log, i);
+ }
+
+ return offset;
+}
+
+/**
+ * xe_guc_check_log_buf_overflow - Check if log buffer overflowed
+ * @log: The log object.
+ * @type: The log buffer type
+ * @full_cnt: The count of buffer full
+ *
+ * This function will check count of buffer full against previous, mismatch
+ * indicate overflowed.
+ * Update the sampled_overflow counter, if the 4 bit counter overflowed, add
+ * up 16 to correct the value.
+ *
+ * Return: True if overflowed.
+ */
+bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_buffer_type type,
+ unsigned int full_cnt)
+{
+ unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
+ bool overflow = false;
+
+ if (full_cnt != prev_full_cnt) {
+ overflow = true;
+
+ log->stats[type].overflow = full_cnt;
+ log->stats[type].sampled_overflow += full_cnt - prev_full_cnt;
+
+ if (full_cnt < prev_full_cnt) {
+ /* buffer_full_cnt is a 4 bit counter */
+ log->stats[type].sampled_overflow += 16;
+ }
+ xe_gt_notice(log_to_gt(log), "log buffer overflow\n");
+ }
+
+ return overflow;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
index 2d25ab28b4b3..5b896f5fafaf 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.h
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -7,8 +7,10 @@
#define _XE_GUC_LOG_H_
#include "xe_guc_log_types.h"
+#include "abi/guc_log_abi.h"
struct drm_printer;
+struct xe_device;
#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
#define CRASH_BUFFER_SIZE SZ_1M
@@ -17,7 +19,7 @@ struct drm_printer;
#else
#define CRASH_BUFFER_SIZE SZ_8K
#define DEBUG_BUFFER_SIZE SZ_64K
-#define CAPTURE_BUFFER_SIZE SZ_16K
+#define CAPTURE_BUFFER_SIZE SZ_1M
#endif
/*
* While we're using plain log level in i915, GuC controls are much more...
@@ -38,6 +40,10 @@ struct drm_printer;
int xe_guc_log_init(struct xe_guc_log *log);
void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+void xe_guc_log_print_dmesg(struct xe_guc_log *log);
+struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic);
+void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p);
+void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot);
static inline u32
xe_guc_log_get_level(struct xe_guc_log *log)
@@ -45,4 +51,11 @@ xe_guc_log_get_level(struct xe_guc_log *log)
return log->level;
}
+u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
+u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
+u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
+bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log,
+ enum guc_log_buffer_type type,
+ unsigned int full_cnt);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
index 125080d138a7..b3d5c72ac752 100644
--- a/drivers/gpu/drm/xe/xe_guc_log_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -7,10 +7,38 @@
#define _XE_GUC_LOG_TYPES_H_
#include <linux/types.h>
+#include "abi/guc_log_abi.h"
+
+#include "xe_uc_fw_types.h"
struct xe_bo;
/**
+ * struct xe_guc_log_snapshot:
+ * Capture of the GuC log plus various state useful for decoding the log
+ */
+struct xe_guc_log_snapshot {
+ /** @size: Size in bytes of the @copy allocation */
+ size_t size;
+ /** @copy: Host memory copy of the log buffer for later dumping, split into chunks */
+ void **copy;
+ /** @num_chunks: Number of chunks within @copy */
+ int num_chunks;
+ /** @ktime: Kernel time the snapshot was taken */
+ u64 ktime;
+ /** @stamp: GuC timestamp at which the snapshot was taken */
+ u64 stamp;
+ /** @level: GuC log verbosity level */
+ u32 level;
+ /** @ver_found: GuC firmware version */
+ struct xe_uc_fw_version ver_found;
+ /** @ver_want: GuC firmware version that driver expected */
+ struct xe_uc_fw_version ver_want;
+ /** @path: Path of GuC firmware blob */
+ const char *path;
+};
+
+/**
* struct xe_guc_log - GuC log
*/
struct xe_guc_log {
@@ -18,6 +46,12 @@ struct xe_guc_log {
u32 level;
/** @bo: XE BO for GuC log */
struct xe_bo *bo;
+ /** @stats: logging related stats */
+ struct {
+ u32 sampled_overflow;
+ u32 overflow;
+ u32 flush;
+ } stats[GUC_LOG_BUFFER_TYPE_MAX];
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 034b29984d5e..e8b9faeaef64 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -262,7 +262,7 @@ static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
/* Allow/Disallow punit to process software freq requests */
- xe_mmio_write32(gt, RP_CONTROL, state);
+ xe_mmio_write32(&gt->mmio, RP_CONTROL, state);
}
static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
@@ -274,7 +274,7 @@ static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
/* Req freq is in units of 16.66 Mhz */
rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq));
- xe_mmio_write32(gt, RPNSWREQ, rpnswreq);
+ xe_mmio_write32(&gt->mmio, RPNSWREQ, rpnswreq);
/* Sleep for a small time to allow pcode to respond */
usleep_range(100, 300);
@@ -334,9 +334,9 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
u32 reg;
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MPE_FREQUENCY);
else
- reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPE_FREQUENCY);
pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
}
@@ -353,9 +353,9 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc)
* PCODE at a different register
*/
if (xe->info.platform == XE_PVC)
- reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, FREQ_INFO_REC);
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
@@ -392,10 +392,10 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
/* When in RC6, actual frequency reported will be 0. */
if (GRAPHICS_VERx100(xe) >= 1270) {
- freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+ freq = xe_mmio_read32(&gt->mmio, MTL_MIRROR_TARGET_WP1);
freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
} else {
- freq = xe_mmio_read32(gt, GT_PERF_STATUS);
+ freq = xe_mmio_read32(&gt->mmio, GT_PERF_STATUS);
freq = REG_FIELD_GET(CAGF_MASK, freq);
}
@@ -415,22 +415,24 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
{
struct xe_gt *gt = pc_to_gt(pc);
- int ret;
+ unsigned int fw_ref;
/*
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
- *freq = xe_mmio_read32(gt, RPNSWREQ);
+ *freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
*freq = decode_freq(*freq);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -480,6 +482,7 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
struct xe_gt *gt = pc_to_gt(pc);
+ unsigned int fw_ref;
int ret;
mutex_lock(&pc->freq_lock);
@@ -493,9 +496,11 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
* GuC SLPC plays with min freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- goto out;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ ret = -ETIMEDOUT;
+ goto fw;
+ }
ret = pc_action_query_task_state(pc);
if (ret)
@@ -504,7 +509,7 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
*freq = pc_get_min_freq(pc);
fw:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
out:
mutex_unlock(&pc->freq_lock);
return ret;
@@ -612,10 +617,10 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
u32 reg, gt_c_state;
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
- reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MIRROR_TARGET_WP1);
gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
} else {
- reg = xe_mmio_read32(gt, GT_CORE_STATUS);
+ reg = xe_mmio_read32(&gt->mmio, GT_CORE_STATUS);
gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
}
@@ -638,7 +643,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
- reg = xe_mmio_read32(gt, GT_GFX_RC6);
+ reg = xe_mmio_read32(&gt->mmio, GT_GFX_RC6);
return reg;
}
@@ -652,7 +657,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u64 reg;
- reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_MC6);
return reg;
}
@@ -665,9 +670,9 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
xe_device_assert_mem_access(pc_to_xe(pc));
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIAP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, MTL_RP_STATE_CAP);
pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg));
@@ -683,9 +688,9 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
xe_device_assert_mem_access(pc_to_xe(pc));
if (xe->info.platform == XE_PVC)
- reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, RP_STATE_CAP);
pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
@@ -855,6 +860,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
{
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
+ unsigned int fw_ref;
int ret = 0;
if (xe->info.skip_guc_pc)
@@ -864,13 +870,15 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
if (ret)
return ret;
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
xe_gt_idle_disable_c6(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -956,13 +964,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ unsigned int fw_ref;
int ret;
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
if (xe->info.skip_guc_pc) {
if (xe->info.platform != XE_PVC)
@@ -1005,7 +1016,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL);
out:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return ret;
}
@@ -1037,18 +1048,19 @@ static void xe_guc_pc_fini_hw(void *arg)
{
struct xe_guc_pc *pc = arg;
struct xe_device *xe = pc_to_xe(pc);
+ unsigned int fw_ref;
if (xe_device_wedged(xe))
return;
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
xe_guc_pc_gucrc_disable(pc);
XE_WARN_ON(xe_guc_pc_stop(pc));
/* Bind requested freq to mert_freq_cap before unload */
pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
- xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
index ade6162dc259..8f62de026724 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay.c
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -5,6 +5,7 @@
#include <linux/bitfield.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <drm/drm_managed.h>
@@ -355,6 +356,7 @@ int xe_guc_relay_init(struct xe_guc_relay *relay)
return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
}
+ALLOW_ERROR_INJECTION(xe_guc_relay_init, ERRNO); /* See xe_pci_probe() */
static u32 to_relay_error(int err)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 4f5d00aea716..6f4a9812b4f4 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -27,6 +27,7 @@
#include "xe_gt_clock.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_id_mgr.h"
@@ -716,6 +717,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
+ struct dma_fence *fence = NULL;
bool lr = xe_exec_queue_is_lr(q);
xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
@@ -733,12 +735,12 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
if (lr) {
xe_sched_job_set_error(job, -EOPNOTSUPP);
- return NULL;
- } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
- return job->fence;
+ dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */
} else {
- return dma_fence_get(job->fence);
+ fence = job->fence;
}
+
+ return fence;
}
static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
@@ -749,7 +751,7 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
xe_sched_job_put(job);
}
-static int guc_read_stopped(struct xe_guc *guc)
+int xe_guc_read_stopped(struct xe_guc *guc)
{
return atomic_read(&guc->submission_state.stopped);
}
@@ -765,17 +767,19 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
struct xe_exec_queue *q)
{
MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
- struct xe_device *xe = guc_to_xe(guc);
int ret;
set_min_preemption_timeout(guc, q);
smp_rmb();
- ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
- guc_read_stopped(guc), HZ * 5);
+ ret = wait_event_timeout(guc->ct.wq,
+ (!exec_queue_pending_enable(q) &&
+ !exec_queue_pending_disable(q)) ||
+ xe_guc_read_stopped(guc),
+ HZ * 5);
if (!ret) {
struct xe_gpu_scheduler *sched = &q->guc->sched;
- drm_warn(&xe->drm, "Pending enable failed to respond");
+ xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
xe_sched_submission_start(sched);
xe_gt_reset_async(q->gt);
xe_sched_tdr_queue_imm(sched);
@@ -897,7 +901,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
*/
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc), HZ * 5);
if (!ret) {
drm_warn(&xe->drm, "Schedule disable failed to respond");
xe_sched_submission_start(sched);
@@ -975,8 +979,8 @@ static void enable_scheduling(struct xe_exec_queue *q)
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_enable(q) ||
- guc_read_stopped(guc), HZ * 5);
- if (!ret || guc_read_stopped(guc)) {
+ xe_guc_read_stopped(guc), HZ * 5);
+ if (!ret || xe_guc_read_stopped(guc)) {
xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
set_exec_queue_banned(q);
xe_gt_reset_async(q->gt);
@@ -1031,6 +1035,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_guc *guc = exec_queue_to_guc(q);
const char *process_name = "no process";
+ struct xe_device *xe = guc_to_xe(guc);
+ unsigned int fw_ref;
int err = -ETIME;
pid_t pid = -1;
int i = 0;
@@ -1058,6 +1064,22 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
exec_queue_destroyed(q);
/*
+ * If devcoredump not captured and GuC capture for the job is not ready
+ * do manual capture first and decide later if we need to use it
+ */
+ if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
+ !xe_guc_capture_get_matching_and_lock(job)) {
+ /* take force wake before engine register manual capture */
+ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
+
+ xe_engine_snapshot_capture_for_job(job);
+
+ xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
+ }
+
+ /*
* XXX: Sampling timeout doesn't work in wedged mode as we have to
* modify scheduling state to read timestamp. We could read the
* timestamp from a register to accumulate current running time but this
@@ -1079,9 +1101,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* modifying state
*/
ret = wait_event_timeout(guc->ct.wq,
- !exec_queue_pending_enable(q) ||
- guc_read_stopped(guc), HZ * 5);
- if (!ret || guc_read_stopped(guc))
+ (!exec_queue_pending_enable(q) &&
+ !exec_queue_pending_disable(q)) ||
+ xe_guc_read_stopped(guc), HZ * 5);
+ if (!ret || xe_guc_read_stopped(guc))
goto trigger_reset;
/*
@@ -1105,8 +1128,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
smp_rmb();
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- guc_read_stopped(guc), HZ * 5);
- if (!ret || guc_read_stopped(guc)) {
+ xe_guc_read_stopped(guc), HZ * 5);
+ if (!ret || xe_guc_read_stopped(guc)) {
trigger_reset:
if (!ret)
xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
@@ -1295,7 +1318,7 @@ static void suspend_fence_signal(struct xe_exec_queue *q)
struct xe_device *xe = guc_to_xe(guc);
xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
- guc_read_stopped(guc));
+ xe_guc_read_stopped(guc));
xe_assert(xe, q->guc->suspend_pending);
__suspend_fence_signal(q);
@@ -1308,10 +1331,10 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
exec_queue_enabled(q)) {
- wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
- guc_read_stopped(guc));
+ wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING ||
+ xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q));
- if (!guc_read_stopped(guc)) {
+ if (!xe_guc_read_stopped(guc)) {
s64 since_resume_ms =
ktime_ms_delta(ktime_get(),
q->guc->resume_time);
@@ -1435,7 +1458,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
q->entity = &ge->entity;
- if (guc_read_stopped(guc))
+ if (xe_guc_read_stopped(guc))
xe_sched_stop(sched);
mutex_unlock(&guc->submission_state.lock);
@@ -1591,7 +1614,7 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
!READ_ONCE(q->guc->suspend_pending) ||
exec_queue_killed(q) ||
- guc_read_stopped(guc),
+ xe_guc_read_stopped(guc),
HZ * 5);
if (!ret) {
@@ -1717,7 +1740,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
void xe_guc_submit_reset_wait(struct xe_guc *guc)
{
wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
- !guc_read_stopped(guc));
+ !xe_guc_read_stopped(guc));
}
void xe_guc_submit_stop(struct xe_guc *guc)
@@ -1726,7 +1749,7 @@ void xe_guc_submit_stop(struct xe_guc *guc)
unsigned long index;
struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, guc_read_stopped(guc) == 1);
+ xe_assert(xe, xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
@@ -1770,7 +1793,7 @@ int xe_guc_submit_start(struct xe_guc *guc)
unsigned long index;
struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, guc_read_stopped(guc) == 1);
+ xe_assert(xe, xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
atomic_dec(&guc->submission_state.stopped);
@@ -1846,16 +1869,29 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
- clear_exec_queue_pending_disable(q);
if (q->guc->suspend_pending) {
suspend_fence_signal(q);
+ clear_exec_queue_pending_disable(q);
} else {
if (exec_queue_banned(q) || check_timeout) {
smp_wmb();
wake_up_all(&guc->ct.wq);
}
- if (!check_timeout)
+ if (!check_timeout && exec_queue_destroyed(q)) {
+ /*
+ * Make sure to clear the pending_disable only
+ * after sampling the destroyed state. We want
+ * to ensure we don't trigger the unregister too
+ * early with something intending to only
+ * disable scheduling. The caller doing the
+ * destroy must wait for an ongoing
+ * pending_disable before marking as destroyed.
+ */
+ clear_exec_queue_pending_disable(q);
deregister_exec_queue(guc, q);
+ } else {
+ clear_exec_queue_pending_disable(q);
+ }
}
}
}
@@ -1949,8 +1985,6 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
- /* FIXME: Do error capture, most likely async */
-
trace_xe_exec_queue_reset(q);
/*
@@ -1966,6 +2000,36 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
+/*
+ * xe_guc_error_capture_handler - Handler of GuC captured message
+ * @guc: The GuC object
+ * @msg: Point to the message
+ * @len: The message length
+ *
+ * When GuC captured data is ready, GuC will send message
+ * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
+ * called 1st to check status before process the data comes with the message.
+ *
+ * Returns: error code. 0 if success
+ */
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ u32 status;
+
+ if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) {
+ xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
+ if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
+ xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
+
+ xe_guc_capture_process(guc);
+
+ return 0;
+}
+
int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len)
{
@@ -2180,7 +2244,7 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
if (!snapshot)
return;
- drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
+ drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
drm_printf(p, "\tName: %s\n", snapshot->name);
drm_printf(p, "\tClass: %d\n", snapshot->class);
drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index bdf8c9f3d24a..9b71a986c6ca 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -20,12 +20,14 @@ void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
void xe_guc_submit_wedge(struct xe_guc *guc);
+int xe_guc_read_stopped(struct xe_guc *guc);
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len);
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index ed150fc09ad0..fa75f57bf5da 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -58,6 +58,8 @@ struct xe_guc {
struct xe_guc_ads ads;
/** @ct: GuC ct */
struct xe_guc_ct ct;
+ /** @capture: the error-state-capture module's data and objects */
+ struct xe_guc_state_capture *capture;
/** @pc: GuC Power Conservation */
struct xe_guc_pc pc;
/** @dbm: GuC Doorbell Manager */
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index f5459f97af23..6a846e4cb221 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -229,7 +229,7 @@ bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type)
{
struct xe_gt *gt = huc_to_gt(huc);
- return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
+ return xe_mmio_read32(&gt->mmio, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
}
int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
@@ -268,7 +268,7 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
goto fail;
}
- ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val,
+ ret = xe_mmio_wait32(&gt->mmio, huc_auth_modes[type].reg, huc_auth_modes[type].val,
huc_auth_modes[type].val, 100000, NULL, false);
if (ret) {
xe_gt_err(gt, "HuC: firmware not verified: %pe\n", ERR_PTR(ret));
@@ -296,19 +296,19 @@ void xe_huc_sanitize(struct xe_huc *huc)
void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
{
struct xe_gt *gt = huc_to_gt(huc);
- int err;
+ unsigned int fw_ref;
xe_uc_fw_print(&huc->fw, p);
if (!xe_uc_fw_is_enabled(&huc->fw))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
drm_printf(p, "\nHuC status: 0x%08x\n",
- xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO));
+ xe_mmio_read32(&gt->mmio, HUC_KERNEL_LOAD_INFO));
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c9c3beb3ce8d..1557acee3523 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -12,6 +12,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
@@ -23,6 +24,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
+#include "xe_guc_capture.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
@@ -295,7 +297,7 @@ void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
reg.addr += hwe->mmio_base;
- xe_mmio_write32(hwe->gt, reg, val);
+ xe_mmio_write32(&hwe->gt->mmio, reg, val);
}
/**
@@ -315,7 +317,7 @@ u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
reg.addr += hwe->mmio_base;
- return xe_mmio_read32(hwe->gt, reg);
+ return xe_mmio_read32(&hwe->gt->mmio, reg);
}
void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
@@ -324,7 +326,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
- xe_mmio_write32(hwe->gt, RCU_MODE,
+ xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
@@ -354,7 +356,7 @@ static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
hwe->class != XE_ENGINE_CLASS_RENDER)
return false;
- return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
+ return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
}
void
@@ -460,6 +462,30 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
}
+static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
+{
+ const struct engine_info *info;
+ enum xe_hw_engine_id id;
+
+ for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+ info = &engine_infos[id];
+ if (info->class == class && info->instance == instance)
+ return info;
+ }
+
+ return NULL;
+}
+
+static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class)
+{
+ /* For MSI-X, hw engines report to offset of engine instance zero */
+ const struct engine_info *info = find_engine_info(class, 0);
+
+ xe_gt_assert(gt, info);
+
+ return info ? info->irq_offset : 0;
+}
+
static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
enum xe_hw_engine_id id)
{
@@ -479,7 +505,9 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
hwe->class = info->class;
hwe->instance = info->instance;
hwe->mmio_base = info->mmio_base;
- hwe->irq_offset = info->irq_offset;
+ hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ?
+ get_msix_irq_offset(gt, info->class) :
+ info->irq_offset;
hwe->domain = info->domain;
hwe->name = info->name;
hwe->fence_irq = &gt->fence_irq[info->class];
@@ -612,7 +640,7 @@ static void read_media_fuses(struct xe_gt *gt)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
+ media_fuse = xe_mmio_read32(&gt->mmio, GT_VEBOX_VDBOX_DISABLE);
/*
* Pre-Xe_HP platforms had register bits representing absent engines,
@@ -657,7 +685,7 @@ static void read_copy_fuses(struct xe_gt *gt)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
+ bcs_mask = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
/* BCS0 is always present; only BCS1-BCS8 may be fused off */
@@ -704,7 +732,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt);
u32 ccs_mask;
- ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
+ ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
@@ -742,8 +770,8 @@ static void check_gsc_availability(struct xe_gt *gt)
gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
/* interrupts where previously enabled, so turn them off */
- xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
- xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
+ xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
+ xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
}
@@ -798,60 +826,10 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
xe_hw_fence_irq_run(hwe->fence_irq);
}
-static bool
-is_slice_common_per_gslice(struct xe_device *xe)
-{
- return GRAPHICS_VERx100(xe) >= 1255;
-}
-
-static void
-xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
- struct xe_hw_engine_snapshot *snapshot)
-{
- struct xe_gt *gt = hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- unsigned int dss;
- u16 group, instance;
-
- snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
-
- if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
- return;
-
- if (is_slice_common_per_gslice(xe) == false) {
- snapshot->reg.instdone.slice_common[0] =
- xe_mmio_read32(gt, SC_INSTDONE);
- snapshot->reg.instdone.slice_common_extra[0] =
- xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
- snapshot->reg.instdone.slice_common_extra2[0] =
- xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
- } else {
- for_each_geometry_dss(dss, gt, group, instance) {
- snapshot->reg.instdone.slice_common[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
- snapshot->reg.instdone.slice_common_extra[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
- snapshot->reg.instdone.slice_common_extra2[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
- }
- }
-
- for_each_geometry_dss(dss, gt, group, instance) {
- snapshot->reg.instdone.sampler[dss] =
- xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
- snapshot->reg.instdone.row[dss] =
- xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
-
- if (GRAPHICS_VERx100(xe) >= 1255)
- snapshot->reg.instdone.geom_svg[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
- group, instance);
- }
-}
-
/**
* xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
+ * @job: The job object.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -860,11 +838,10 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
* caller, using `xe_hw_engine_snapshot_free`.
*/
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job)
{
struct xe_hw_engine_snapshot *snapshot;
- size_t len;
- u64 val;
+ struct __guc_capture_parsed_output *node;
if (!xe_hw_engine_is_valid(hwe))
return NULL;
@@ -874,28 +851,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
if (!snapshot)
return NULL;
- /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
- * includes xe_hw_engine_types.h the length of this 3 registers can't be
- * set in struct xe_hw_engine_snapshot, so here doing additional
- * allocations.
- */
- len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
- snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
- if (!snapshot->reg.instdone.slice_common ||
- !snapshot->reg.instdone.slice_common_extra ||
- !snapshot->reg.instdone.slice_common_extra2 ||
- !snapshot->reg.instdone.sampler ||
- !snapshot->reg.instdone.row ||
- !snapshot->reg.instdone.geom_svg) {
- xe_hw_engine_snapshot_free(snapshot);
- return NULL;
- }
-
snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
snapshot->hwe = hwe;
snapshot->logical_instance = hwe->logical_instance;
@@ -903,157 +858,32 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
hwe->domain);
snapshot->mmio_base = hwe->mmio_base;
+ snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe);
/* no more VF accessible data below this point */
if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
return snapshot;
- snapshot->reg.ring_execlist_status =
- xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
- snapshot->reg.ring_execlist_status |= val << 32;
-
- snapshot->reg.ring_execlist_sq_contents =
- xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
- snapshot->reg.ring_execlist_sq_contents |= val << 32;
-
- snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
- snapshot->reg.ring_acthd |= val << 32;
-
- snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
- snapshot->reg.ring_bbaddr |= val << 32;
-
- snapshot->reg.ring_dma_fadd =
- xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
- snapshot->reg.ring_dma_fadd |= val << 32;
-
- snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
- snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
- snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0));
- if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
- val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0));
- snapshot->reg.ring_start |= val << 32;
- }
- if (xe_gt_has_indirect_ring_state(hwe->gt)) {
- snapshot->reg.indirect_ring_state =
- xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
- }
-
- snapshot->reg.ring_head =
- xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
- snapshot->reg.ring_tail =
- xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
- snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0));
- snapshot->reg.ring_mi_mode =
- xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
- snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0));
- snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0));
- snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0));
- snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0));
- snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0));
- snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0));
- xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
-
- if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
- snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
-
- return snapshot;
-}
-
-static void
-xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
-{
- struct xe_gt *gt = snapshot->hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- u16 group, instance;
- unsigned int dss;
-
- drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
-
- if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
- return;
-
- if (is_slice_common_per_gslice(xe) == false) {
- drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
- snapshot->reg.instdone.slice_common[0]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
- snapshot->reg.instdone.slice_common_extra[0]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
- snapshot->reg.instdone.slice_common_extra2[0]);
- } else {
- for_each_geometry_dss(dss, gt, group, instance) {
- drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.slice_common[dss]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.slice_common_extra[dss]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.slice_common_extra2[dss]);
+ if (job) {
+ /* If got guc capture, set source to GuC */
+ node = xe_guc_capture_get_matching_and_lock(job);
+ if (node) {
+ struct xe_device *xe = gt_to_xe(hwe->gt);
+ struct xe_devcoredump *coredump = &xe->devcoredump;
+
+ coredump->snapshot.matched_node = node;
+ snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
+ xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node");
+ return snapshot;
}
}
- for_each_geometry_dss(dss, gt, group, instance) {
- drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.sampler[dss]);
- drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.row[dss]);
-
- if (GRAPHICS_VERx100(xe) >= 1255)
- drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
- dss, snapshot->reg.instdone.geom_svg[dss]);
- }
-}
+ /* otherwise, do manual capture */
+ xe_engine_manual_capture(hwe, snapshot);
+ snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
+ xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot");
-/**
- * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
- * @snapshot: Xe HW Engine snapshot object.
- * @p: drm_printer where it will be printed out.
- *
- * This function prints out a given Xe HW Engine snapshot object.
- */
-void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
- struct drm_printer *p)
-{
- if (!snapshot)
- return;
-
- drm_printf(p, "%s (physical), logical instance=%d\n",
- snapshot->name ? snapshot->name : "",
- snapshot->logical_instance);
- drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
- snapshot->forcewake.domain, snapshot->forcewake.ref);
- drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
- drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
- drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
- snapshot->reg.ring_execlist_status);
- drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
- snapshot->reg.ring_execlist_sq_contents);
- drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
- drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
- drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
- drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
- drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
- drm_printf(p, "\tRING_MODE: 0x%08x\n",
- snapshot->reg.ring_mode);
- drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
- drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
- drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
- drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
- drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
- drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
- drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
- drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
- snapshot->reg.indirect_ring_state);
- drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
- xe_hw_engine_snapshot_instdone_print(snapshot, p);
-
- if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
- drm_printf(p, "\tRCU_MODE: 0x%08x\n",
- snapshot->reg.rcu_mode);
- drm_puts(p, "\n");
+ return snapshot;
}
/**
@@ -1065,15 +895,18 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
*/
void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
{
+ struct xe_gt *gt;
if (!snapshot)
return;
- kfree(snapshot->reg.instdone.slice_common);
- kfree(snapshot->reg.instdone.slice_common_extra);
- kfree(snapshot->reg.instdone.slice_common_extra2);
- kfree(snapshot->reg.instdone.sampler);
- kfree(snapshot->reg.instdone.row);
- kfree(snapshot->reg.instdone.geom_svg);
+ gt = snapshot->hwe->gt;
+ /*
+ * xe_guc_capture_put_matched_nodes is called here and from
+ * xe_devcoredump_snapshot_free, to cover the 2 calling paths
+ * of hw_engines - debugfs and devcoredump free.
+ */
+ xe_guc_capture_put_matched_nodes(&gt->uc.guc);
+
kfree(snapshot->name);
kfree(snapshot);
}
@@ -1089,8 +922,8 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
{
struct xe_hw_engine_snapshot *snapshot;
- snapshot = xe_hw_engine_snapshot_capture(hwe);
- xe_hw_engine_snapshot_print(snapshot, p);
+ snapshot = xe_hw_engine_snapshot_capture(hwe, NULL);
+ xe_engine_snapshot_print(snapshot, p);
xe_hw_engine_snapshot_free(snapshot);
}
@@ -1150,7 +983,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
{
- return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
+ return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base));
}
enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 022819a4a8eb..da0a6922a26f 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -11,6 +11,7 @@
struct drm_printer;
struct drm_xe_engine_class_instance;
struct xe_device;
+struct xe_sched_job;
#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
@@ -54,12 +55,9 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
enum xe_engine_class engine_class);
-
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe);
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job);
void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot);
-void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
- struct drm_printer *p);
void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 8be6d420ece4..719f27ef00a5 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -152,6 +152,11 @@ struct xe_hw_engine {
struct xe_hw_engine_group *hw_engine_group;
};
+enum xe_hw_engine_snapshot_source_id {
+ XE_ENGINE_CAPTURE_SOURCE_MANUAL,
+ XE_ENGINE_CAPTURE_SOURCE_GUC
+};
+
/**
* struct xe_hw_engine_snapshot - Hardware engine snapshot
*
@@ -160,6 +165,8 @@ struct xe_hw_engine {
struct xe_hw_engine_snapshot {
/** @name: name of the hw engine */
char *name;
+ /** @source: Data source, either manual or GuC */
+ enum xe_hw_engine_snapshot_source_id source;
/** @hwe: hw engine */
struct xe_hw_engine *hwe;
/** @logical_instance: logical instance of this hw engine */
@@ -173,65 +180,8 @@ struct xe_hw_engine_snapshot {
} forcewake;
/** @mmio_base: MMIO base address of this hw engine*/
u32 mmio_base;
- /** @reg: Useful MMIO register snapshot */
- struct {
- /** @reg.ring_execlist_status: RING_EXECLIST_STATUS */
- u64 ring_execlist_status;
- /** @reg.ring_execlist_sq_contents: RING_EXECLIST_SQ_CONTENTS */
- u64 ring_execlist_sq_contents;
- /** @reg.ring_acthd: RING_ACTHD */
- u64 ring_acthd;
- /** @reg.ring_bbaddr: RING_BBADDR */
- u64 ring_bbaddr;
- /** @reg.ring_dma_fadd: RING_DMA_FADD */
- u64 ring_dma_fadd;
- /** @reg.ring_hwstam: RING_HWSTAM */
- u32 ring_hwstam;
- /** @reg.ring_hws_pga: RING_HWS_PGA */
- u32 ring_hws_pga;
- /** @reg.ring_start: RING_START */
- u64 ring_start;
- /** @reg.ring_head: RING_HEAD */
- u32 ring_head;
- /** @reg.ring_tail: RING_TAIL */
- u32 ring_tail;
- /** @reg.ring_ctl: RING_CTL */
- u32 ring_ctl;
- /** @reg.ring_mi_mode: RING_MI_MODE */
- u32 ring_mi_mode;
- /** @reg.ring_mode: RING_MODE */
- u32 ring_mode;
- /** @reg.ring_imr: RING_IMR */
- u32 ring_imr;
- /** @reg.ring_esr: RING_ESR */
- u32 ring_esr;
- /** @reg.ring_emr: RING_EMR */
- u32 ring_emr;
- /** @reg.ring_eir: RING_EIR */
- u32 ring_eir;
- /** @reg.indirect_ring_state: INDIRECT_RING_STATE */
- u32 indirect_ring_state;
- /** @reg.ipehr: IPEHR */
- u32 ipehr;
- /** @reg.rcu_mode: RCU_MODE */
- u32 rcu_mode;
- struct {
- /** @reg.instdone.ring: RING_INSTDONE */
- u32 ring;
- /** @reg.instdone.slice_common: SC_INSTDONE */
- u32 *slice_common;
- /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */
- u32 *slice_common_extra;
- /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */
- u32 *slice_common_extra2;
- /** @reg.instdone.sampler: SAMPLER_INSTDONE */
- u32 *sampler;
- /** @reg.instdone.row: ROW_INSTDONE */
- u32 *row;
- /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */
- u32 *geom_svg;
- } instdone;
- } reg;
+ /** @kernel_reserved: Engine reserved, can't be used by userspace */
+ bool kernel_reserved;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index aa11728e7e79..fde56dad3ab7 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -149,7 +149,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v
u64 reg_val, min, max;
struct xe_device *xe = hwmon->xe;
struct xe_reg rapl_limit, pkg_power_sku;
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
@@ -190,7 +190,7 @@ unlock:
static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
int ret = 0;
u64 reg_val;
struct xe_reg rapl_limit;
@@ -222,7 +222,7 @@ unlock:
static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
u64 reg_val;
@@ -259,7 +259,7 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l
static void
xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
u64 reg_val;
@@ -282,7 +282,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
char *buf)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u32 x, y, x_w = 2; /* 2 bits */
u64 r, tau4, out;
int sensor_index = to_sensor_dev_attr(attr)->index;
@@ -323,7 +323,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
const char *buf, size_t count)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u32 x, y, rxy, x_w = 2; /* 2 bits */
u64 tau4, r, max_win;
unsigned long val;
@@ -498,7 +498,7 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u64 reg_val;
reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel));
@@ -781,7 +781,7 @@ static const struct hwmon_chip_info hwmon_chip_info = {
static void
xe_hwmon_get_preregistration_info(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct xe_hwmon *hwmon = xe->hwmon;
long energy;
u64 val_sku_unit = 0;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 5f2c368c35ad..b7995ebd54ab 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -10,8 +10,7 @@
#include <drm/drm_managed.h>
#include "display/xe_display.h"
-#include "regs/xe_gt_regs.h"
-#include "regs/xe_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_device.h"
#include "xe_drv.h"
#include "xe_gsc_proxy.h"
@@ -30,14 +29,14 @@
#define IIR(offset) XE_REG(offset + 0x8)
#define IER(offset) XE_REG(offset + 0xc)
-static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
+static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg)
{
u32 val = xe_mmio_read32(mmio, reg);
if (val == 0)
return;
- drm_WARN(&gt_to_xe(mmio)->drm, 1,
+ drm_WARN(&mmio->tile->xe->drm, 1,
"Interrupt register 0x%x is not zero: 0x%08x\n",
reg.addr, val);
xe_mmio_write32(mmio, reg, 0xffffffff);
@@ -52,7 +51,7 @@ static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
*/
static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
/*
* If we're just enabling an interrupt now, it shouldn't already
@@ -70,7 +69,7 @@ static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
/* Mask and disable all interrupts. */
static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
xe_mmio_write32(mmio, IMR(irqregs), ~0);
/* Posting read */
@@ -87,7 +86,7 @@ static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
static u32 xelp_intr_disable(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0);
@@ -103,7 +102,7 @@ static u32 xelp_intr_disable(struct xe_device *xe)
static u32
gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u32 iir;
if (!(master_ctl & GU_MISC_IRQ))
@@ -118,7 +117,7 @@ gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ);
if (stall)
@@ -129,12 +128,13 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
void xe_irq_enable_hwe(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ struct xe_mmio *mmio = &gt->mmio;
u32 ccs_mask, bcs_mask;
u32 irqs, dmask, smask;
u32 gsc_mask = 0;
u32 heci_mask = 0;
- if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe))
+ if (xe_device_uses_memirq(xe))
return;
if (xe_device_uc_enabled(xe)) {
@@ -155,35 +155,35 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
if (!xe_gt_is_media_type(gt)) {
/* Enable interrupts for each engine class */
- xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
+ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask);
if (ccs_mask)
- xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
+ xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask);
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
- xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
+ xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask);
+ xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask);
if (bcs_mask & (BIT(1)|BIT(2)))
- xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(3)|BIT(4)))
- xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(5)|BIT(6)))
- xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(7)|BIT(8)))
- xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
if (ccs_mask & (BIT(0)|BIT(1)))
- xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask);
}
if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
/* Enable interrupts for each engine class */
- xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
+ xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask);
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
- xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
- xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask);
/*
* the heci2 interrupt is enabled via the same register as the
@@ -197,17 +197,17 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
}
if (gsc_mask) {
- xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
- xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
+ xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
+ xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~gsc_mask);
}
if (heci_mask)
- xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
+ xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
}
}
static u32
gt_engine_identity(struct xe_device *xe,
- struct xe_gt *mmio,
+ struct xe_mmio *mmio,
const unsigned int bank,
const unsigned int bit)
{
@@ -279,7 +279,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
return tile->media_gt;
default:
break;
- };
+ }
fallthrough;
default:
return tile->primary_gt;
@@ -291,7 +291,7 @@ static void gt_irq_handler(struct xe_tile *tile,
u32 *identity)
{
struct xe_device *xe = tile_to_xe(tile);
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
unsigned int bank, bit;
u16 instance, intr_vec;
enum xe_engine_class class;
@@ -376,7 +376,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
static u32 dg1_intr_disable(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u32 val;
/* First disable interrupts */
@@ -394,7 +394,7 @@ static u32 dg1_intr_disable(struct xe_device *xe)
static void dg1_intr_enable(struct xe_device *xe, bool stall)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
if (stall)
@@ -431,7 +431,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
}
for_each_tile(tile, xe, id) {
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0)
continue;
@@ -474,7 +474,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
static void gt_irq_reset(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
XE_ENGINE_CLASS_COMPUTE);
@@ -504,7 +504,7 @@ static void gt_irq_reset(struct xe_tile *tile)
if (ccs_mask & (BIT(0)|BIT(1)))
xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
if ((tile->media_gt &&
xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
@@ -547,7 +547,7 @@ static void dg1_irq_reset(struct xe_tile *tile)
static void dg1_irq_reset_mstr(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
}
@@ -566,7 +566,7 @@ static void vf_irq_reset(struct xe_device *xe)
for_each_tile(tile, xe, id) {
if (xe_device_has_memirq(xe))
- xe_memirq_reset(&tile->sriov.vf.memirq);
+ xe_memirq_reset(&tile->memirq);
else
gt_irq_reset(tile);
}
@@ -609,7 +609,7 @@ static void vf_irq_postinstall(struct xe_device *xe)
for_each_tile(tile, xe, id)
if (xe_device_has_memirq(xe))
- xe_memirq_postinstall(&tile->sriov.vf.memirq);
+ xe_memirq_postinstall(&tile->memirq);
if (GRAPHICS_VERx100(xe) < 1210)
xelp_intr_enable(xe, true);
@@ -652,7 +652,7 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
spin_unlock(&xe->irq.lock);
for_each_tile(tile, xe, id)
- xe_memirq_handler(&tile->sriov.vf.memirq);
+ xe_memirq_handler(&tile->memirq);
return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 8999ac511555..a60ceae4c6dd 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -193,7 +193,7 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
- xe_mmio_write32(tile->primary_gt,
+ xe_mmio_write32(&tile->mmio,
GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
}
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index aec7db39c061..4f64c7f4e68d 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -38,24 +38,6 @@
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-struct xe_lrc_snapshot {
- struct xe_bo *lrc_bo;
- void *lrc_snapshot;
- unsigned long lrc_size, lrc_offset;
-
- u32 context_desc;
- u32 indirect_context_desc;
- u32 head;
- struct {
- u32 internal;
- u32 memory;
- } tail;
- u32 start_seqno;
- u32 seqno;
- u32 ctx_timestamp;
- u32 ctx_job_timestamp;
-};
-
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
{
@@ -599,10 +581,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
{
- struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
+ struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
struct xe_device *xe = gt_to_xe(hwe->gt);
- if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
+ if (!xe_device_uses_memirq(xe))
return;
regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
@@ -613,9 +595,9 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
- regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
+ regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
- regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
+ regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
}
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index c24542e89318..40d8f6906d3e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -17,9 +17,26 @@ enum xe_engine_class;
struct xe_gt;
struct xe_hw_engine;
struct xe_lrc;
-struct xe_lrc_snapshot;
struct xe_vm;
+struct xe_lrc_snapshot {
+ struct xe_bo *lrc_bo;
+ void *lrc_snapshot;
+ unsigned long lrc_size, lrc_offset;
+
+ u32 context_desc;
+ u32 indirect_context_desc;
+ u32 head;
+ struct {
+ u32 internal;
+ u32 memory;
+ } tail;
+ u32 start_seqno;
+ u32 seqno;
+ u32 ctx_timestamp;
+ u32 ctx_job_timestamp;
+};
+
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 95b6e9d7b7db..f833da88150a 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -5,8 +5,8 @@
#include <drm/drm_managed.h>
-#include "regs/xe_gt_regs.h"
#include "regs/xe_guc_regs.h"
+#include "regs/xe_irq_regs.h"
#include "regs/xe_regs.h"
#include "xe_assert.h"
@@ -19,15 +19,25 @@
#include "xe_hw_engine.h"
#include "xe_map.h"
#include "xe_memirq.h"
-#include "xe_sriov.h"
-#include "xe_sriov_printk.h"
#define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition)
-#define memirq_debug(m, msg...) xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg)
+#define memirq_printk(m, _level, _fmt, ...) \
+ drm_##_level(&memirq_to_xe(m)->drm, "MEMIRQ%u: " _fmt, \
+ memirq_to_tile(m)->id, ##__VA_ARGS__)
+
+#ifdef CONFIG_DRM_XE_DEBUG_MEMIRQ
+#define memirq_debug(m, _fmt, ...) memirq_printk(m, dbg, _fmt, ##__VA_ARGS__)
+#else
+#define memirq_debug(...)
+#endif
+
+#define memirq_err(m, _fmt, ...) memirq_printk(m, err, _fmt, ##__VA_ARGS__)
+#define memirq_err_ratelimited(m, _fmt, ...) \
+ memirq_printk(m, err_ratelimited, _fmt, ##__VA_ARGS__)
static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq)
{
- return container_of(memirq, struct xe_tile, sriov.vf.memirq);
+ return container_of(memirq, struct xe_tile, memirq);
}
static struct xe_device *memirq_to_xe(struct xe_memirq *memirq)
@@ -105,6 +115,44 @@ static const char *guc_name(struct xe_guc *guc)
* | |
* | |
* +-----------+
+ *
+ *
+ * MSI-X use case
+ *
+ * When using MSI-X, hw engines report interrupt status and source to engine
+ * instance 0. For this scenario, in order to differentiate between the
+ * engines, we need to pass different status/source pointers in the LRC.
+ *
+ * The requirements on those pointers are:
+ * - Interrupt status should be 4KiB aligned
+ * - Interrupt source should be 64 bytes aligned
+ *
+ * To accommodate this, we duplicate the memirq page layout above -
+ * allocating a page for each engine instance and pass this page in the LRC.
+ * Note that the same page can be reused for different engine types.
+ * For example, an LRC executing on CCS #x will have pointers to page #x,
+ * and an LRC executing on BCS #x will have the same pointers.
+ *
+ * ::
+ *
+ * 0x0000 +==============================+ <== page for instance 0 (BCS0, CCS0, etc.)
+ * | Interrupt Status Report Page |
+ * 0x0400 +==============================+
+ * | Interrupt Source Report Page |
+ * 0x0440 +==============================+
+ * | Interrupt Enable Mask |
+ * +==============================+
+ * | Not used |
+ * 0x1000 +==============================+ <== page for instance 1 (BCS1, CCS1, etc.)
+ * | Interrupt Status Report Page |
+ * 0x1400 +==============================+
+ * | Interrupt Source Report Page |
+ * 0x1440 +==============================+
+ * | Not used |
+ * 0x2000 +==============================+ <== page for instance 2 (BCS2, CCS2, etc.)
+ * | ... |
+ * +==============================+
+ *
*/
static void __release_xe_bo(struct drm_device *drm, void *arg)
@@ -114,18 +162,30 @@ static void __release_xe_bo(struct drm_device *drm, void *arg)
xe_bo_unpin_map_no_vm(bo);
}
+static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq)
+{
+ /*
+ * When the HW engines are configured to use MSI-X,
+ * they report interrupt status and source to the offset of
+ * engine instance 0.
+ */
+ return xe_device_has_msix(memirq_to_xe(memirq));
+}
+
static int memirq_alloc_pages(struct xe_memirq *memirq)
{
struct xe_device *xe = memirq_to_xe(memirq);
struct xe_tile *tile = memirq_to_tile(memirq);
+ size_t bo_size = hw_reports_to_instance_zero(memirq) ?
+ XE_HW_ENGINE_MAX_INSTANCE * SZ_4K : SZ_4K;
struct xe_bo *bo;
int err;
- BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64));
- BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K));
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64));
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K));
/* XXX: convert to managed bo */
- bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+ bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
@@ -140,25 +200,25 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
memirq_assert(memirq, !xe_bo_is_vram(bo));
memirq_assert(memirq, !memirq->bo);
- iosys_map_memset(&bo->vmap, 0, 0, SZ_4K);
+ iosys_map_memset(&bo->vmap, 0, 0, bo_size);
memirq->bo = bo;
- memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET);
- memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET);
+ memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET(0));
+ memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET(0));
memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET);
memirq_assert(memirq, !memirq->source.is_iomem);
memirq_assert(memirq, !memirq->status.is_iomem);
memirq_assert(memirq, !memirq->mask.is_iomem);
- memirq_debug(memirq, "page offsets: source %#x status %#x\n",
- xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq));
+ memirq_debug(memirq, "page offsets: bo %#x bo_size %zu source %#x status %#x\n",
+ xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0),
+ XE_MEMIRQ_STATUS_OFFSET(0));
return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
out:
- xe_sriov_err(memirq_to_xe(memirq),
- "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
+ memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
return err;
}
@@ -178,9 +238,7 @@ static void memirq_set_enable(struct xe_memirq *memirq, bool enable)
*
* These allocations are managed and will be implicitly released on unload.
*
- * Note: This function shall be called only by the VF driver.
- *
- * If this function fails then VF driver won't be able to operate correctly.
+ * If this function fails then the driver won't be able to operate correctly.
* If `Memory Based Interrupts`_ are not used this function will return 0.
*
* Return: 0 on success or a negative error code on failure.
@@ -190,9 +248,7 @@ int xe_memirq_init(struct xe_memirq *memirq)
struct xe_device *xe = memirq_to_xe(memirq);
int err;
- memirq_assert(memirq, IS_SRIOV_VF(xe));
-
- if (!xe_device_has_memirq(xe))
+ if (!xe_device_uses_memirq(xe))
return 0;
err = memirq_alloc_pages(memirq);
@@ -205,55 +261,70 @@ int xe_memirq_init(struct xe_memirq *memirq)
return 0;
}
+static u32 __memirq_source_page(struct xe_memirq *memirq, u16 instance)
+{
+ memirq_assert(memirq, instance <= XE_HW_ENGINE_MAX_INSTANCE);
+ memirq_assert(memirq, memirq->bo);
+
+ instance = hw_reports_to_instance_zero(memirq) ? instance : 0;
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET(instance);
+}
+
/**
* xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_.
* @memirq: the &xe_memirq to query
+ * @hwe: the hw engine for which we want the report page
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the `Interrupt Source Report Page`_.
*/
-u32 xe_memirq_source_ptr(struct xe_memirq *memirq)
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
+
+ return __memirq_source_page(memirq, hwe->instance);
+}
+
+static u32 __memirq_status_page(struct xe_memirq *memirq, u16 instance)
+{
+ memirq_assert(memirq, instance <= XE_HW_ENGINE_MAX_INSTANCE);
memirq_assert(memirq, memirq->bo);
- return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET;
+ instance = hw_reports_to_instance_zero(memirq) ? instance : 0;
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET(instance);
}
/**
* xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_.
* @memirq: the &xe_memirq to query
+ * @hwe: the hw engine for which we want the report page
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the `Interrupt Status Report Page`_.
*/
-u32 xe_memirq_status_ptr(struct xe_memirq *memirq)
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
- memirq_assert(memirq, memirq->bo);
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
- return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET;
+ return __memirq_status_page(memirq, hwe->instance);
}
/**
* xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask.
* @memirq: the &xe_memirq to query
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the Interrupt Enable Mask.
*/
u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
memirq_assert(memirq, memirq->bo);
return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET;
@@ -267,7 +338,7 @@ u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
* Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
* to be used by the GuC when `Memory Based Interrupts`_ are required.
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: 0 on success or a negative error code on failure.
@@ -279,12 +350,10 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
u32 source, status;
int err;
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
- memirq_assert(memirq, memirq->bo);
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
- source = xe_memirq_source_ptr(memirq) + offset;
- status = xe_memirq_status_ptr(memirq) + offset * SZ_16;
+ source = __memirq_source_page(memirq, 0) + offset;
+ status = __memirq_status_page(memirq, 0) + offset * SZ_16;
err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY,
source);
@@ -299,9 +368,8 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
return 0;
failed:
- xe_sriov_err(memirq_to_xe(memirq),
- "Failed to setup report pages in %s (%pe)\n",
- guc_name(guc), ERR_PTR(err));
+ memirq_err(memirq, "Failed to setup report pages in %s (%pe)\n",
+ guc_name(guc), ERR_PTR(err));
return err;
}
@@ -311,13 +379,12 @@ failed:
*
* This is part of the driver IRQ setup flow.
*
- * This function shall only be used by the VF driver on platforms that use
+ * This function shall only be used on platforms that use
* `Memory Based Interrupts`_.
*/
void xe_memirq_reset(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
if (memirq->bo)
memirq_set_enable(memirq, false);
@@ -329,13 +396,12 @@ void xe_memirq_reset(struct xe_memirq *memirq)
*
* This is part of the driver IRQ setup flow.
*
- * This function shall only be used by the VF driver on platforms that use
+ * This function shall only be used on platforms that use
* `Memory Based Interrupts`_.
*/
void xe_memirq_postinstall(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
if (memirq->bo)
memirq_set_enable(memirq, true);
@@ -349,9 +415,9 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
value = iosys_map_rd(vector, offset, u8);
if (value) {
if (value != 0xff)
- xe_sriov_err_ratelimited(memirq_to_xe(memirq),
- "Unexpected memirq value %#x from %s at %u\n",
- value, name, offset);
+ memirq_err_ratelimited(memirq,
+ "Unexpected memirq value %#x from %s at %u\n",
+ value, name, offset);
iosys_map_wr(vector, offset, u8, 0x00);
}
@@ -379,6 +445,28 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
}
/**
+ * xe_memirq_hwe_handler - Check and process interrupts for a specific HW engine.
+ * @memirq: the &xe_memirq
+ * @hwe: the hw engine to process
+ *
+ * This function reads and dispatches `Memory Based Interrupts` for the provided HW engine.
+ */
+void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
+{
+ u16 offset = hwe->irq_offset;
+ u16 instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0;
+ struct iosys_map src_offset = IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap,
+ XE_MEMIRQ_SOURCE_OFFSET(instance));
+
+ if (memirq_received(memirq, &src_offset, offset, "SRC")) {
+ struct iosys_map status_offset =
+ IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap,
+ XE_MEMIRQ_STATUS_OFFSET(instance) + offset * SZ_16);
+ memirq_dispatch_engine(memirq, &status_offset, hwe);
+ }
+}
+
+/**
* xe_memirq_handler - The `Memory Based Interrupts`_ Handler.
* @memirq: the &xe_memirq
*
@@ -405,13 +493,8 @@ void xe_memirq_handler(struct xe_memirq *memirq)
if (gt->tile != tile)
continue;
- for_each_hw_engine(hwe, gt, id) {
- if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) {
- map = IOSYS_MAP_INIT_OFFSET(&memirq->status,
- hwe->irq_offset * SZ_16);
- memirq_dispatch_engine(memirq, &map, hwe);
- }
- }
+ for_each_hw_engine(hwe, gt, id)
+ xe_memirq_hwe_handler(memirq, hwe);
}
/* GuC and media GuC (if present) must be checked separately */
diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h
index 2d40d03c3095..06130650e9d6 100644
--- a/drivers/gpu/drm/xe/xe_memirq.h
+++ b/drivers/gpu/drm/xe/xe_memirq.h
@@ -9,16 +9,18 @@
#include <linux/types.h>
struct xe_guc;
+struct xe_hw_engine;
struct xe_memirq;
int xe_memirq_init(struct xe_memirq *memirq);
-u32 xe_memirq_source_ptr(struct xe_memirq *memirq);
-u32 xe_memirq_status_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
u32 xe_memirq_enable_ptr(struct xe_memirq *memirq);
void xe_memirq_reset(struct xe_memirq *memirq);
void xe_memirq_postinstall(struct xe_memirq *memirq);
+void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
void xe_memirq_handler(struct xe_memirq *memirq);
int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h
index 625b6b8736cc..9d0f6c1cdb9d 100644
--- a/drivers/gpu/drm/xe/xe_memirq_types.h
+++ b/drivers/gpu/drm/xe/xe_memirq_types.h
@@ -11,9 +11,9 @@
struct xe_bo;
/* ISR */
-#define XE_MEMIRQ_STATUS_OFFSET 0x0
+#define XE_MEMIRQ_STATUS_OFFSET(inst) ((inst) * SZ_4K + 0x0)
/* IIR */
-#define XE_MEMIRQ_SOURCE_OFFSET 0x400
+#define XE_MEMIRQ_SOURCE_OFFSET(inst) ((inst) * SZ_4K + 0x400)
/* IMR */
#define XE_MEMIRQ_ENABLE_OFFSET 0x440
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index cfd31ae49cc1..1b97d90aadda 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -209,7 +209,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
num_entries * XE_PAGE_SIZE,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PAGETABLE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -1350,6 +1351,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
/* For sysmem PTE's, need to map them in our hole.. */
if (!IS_DGFX(xe)) {
+ u16 pat_index = xe->pat.idx[XE_CACHE_WB];
u32 ptes, ofs;
ppgtt_ofs = NUM_KERNEL_PDE - 1;
@@ -1409,7 +1411,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
pt_bo->update_index = current_update;
addr = vm->pt_ops->pte_encode_bo(pt_bo, 0,
- XE_CACHE_WB, 0);
+ pat_index, 0);
bb->cs[bb->len++] = lower_32_bits(addr);
bb->cs[bb->len++] = upper_32_bits(addr);
}
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 3fd462fda625..a48f239cad1c 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -36,13 +36,19 @@ static void tiles_fini(void *arg)
/*
* On multi-tile devices, partition the BAR space for MMIO on each tile,
* possibly accounting for register override on the number of tiles available.
+ * tile_mmio_size contains both the tile's 4MB register space, as well as
+ * additional space for the GTT and other (possibly unused) regions).
* Resulting memory layout is like below:
*
* .----------------------. <- tile_count * tile_mmio_size
* | .... |
* |----------------------| <- 2 * tile_mmio_size
+ * | tile1 GTT + other |
+ * |----------------------| <- 1 * tile_mmio_size + 4MB
* | tile1->mmio.regs |
* |----------------------| <- 1 * tile_mmio_size
+ * | tile0 GTT + other |
+ * |----------------------| <- 4MB
* | tile0->mmio.regs |
* '----------------------' <- 0MB
*/
@@ -61,16 +67,16 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
/* Possibly override number of tile based on configuration register */
if (!xe->info.skip_mtcfg) {
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u8 tile_count;
u32 mtcfg;
/*
* Although the per-tile mmio regs are not yet initialized, this
- * is fine as it's going to the root gt, that's guaranteed to be
- * initialized earlier in xe_mmio_init()
+ * is fine as it's going to the root tile's mmio, that's
+ * guaranteed to be initialized earlier in xe_mmio_init()
*/
- mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
+ mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR);
tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
if (tile_count < xe->info.tile_count) {
@@ -90,8 +96,9 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
regs = xe->mmio.regs;
for_each_tile(tile, xe, id) {
- tile->mmio.size = tile_mmio_size;
+ tile->mmio.regs_size = SZ_4M;
tile->mmio.regs = regs;
+ tile->mmio.tile = tile;
regs += tile_mmio_size;
}
}
@@ -126,8 +133,9 @@ static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size,
regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count;
for_each_tile(tile, xe, id) {
- tile->mmio_ext.size = tile_mmio_ext_size;
+ tile->mmio_ext.regs_size = tile_mmio_ext_size;
tile->mmio_ext.regs = regs;
+ tile->mmio_ext.tile = tile;
regs += tile_mmio_ext_size;
}
}
@@ -157,137 +165,132 @@ int xe_mmio_init(struct xe_device *xe)
{
struct xe_tile *root_tile = xe_device_get_root_tile(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- const int mmio_bar = 0;
/*
* Map the entire BAR.
* The first 16MB of the BAR, belong to the root tile, and include:
* registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
*/
- xe->mmio.size = pci_resource_len(pdev, mmio_bar);
- xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR);
+ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR);
+ xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0);
if (xe->mmio.regs == NULL) {
drm_err(&xe->drm, "failed to map registers\n");
return -EIO;
}
/* Setup first tile; other tiles (if present) will be setup later. */
- root_tile->mmio.size = SZ_16M;
+ root_tile->mmio.regs_size = SZ_4M;
root_tile->mmio.regs = xe->mmio.regs;
+ root_tile->mmio.tile = root_tile;
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
-static void mmio_flush_pending_writes(struct xe_gt *gt)
+static void mmio_flush_pending_writes(struct xe_mmio *mmio)
{
#define DUMMY_REG_OFFSET 0x130030
- struct xe_tile *tile = gt_to_tile(gt);
int i;
- if (tile->xe->info.platform != XE_LUNARLAKE)
+ if (mmio->tile->xe->info.platform != XE_LUNARLAKE)
return;
/* 4 dummy writes */
for (i = 0; i < 4; i++)
- writel(0, tile->mmio.regs + DUMMY_REG_OFFSET);
+ writel(0, mmio->regs + DUMMY_REG_OFFSET);
}
-u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
+u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u8 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ val = readb(mmio->regs + addr);
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u16 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ val = readw(mmio->regs + addr);
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
+void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
- trace_xe_reg_rw(gt, true, addr, val, sizeof(val));
+ trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
- if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
- xe_gt_sriov_vf_write32(gt, reg, val);
+ if (!reg.vf && mmio->sriov_vf_gt)
+ xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val);
else
- writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
+ writel(val, mmio->regs + addr);
}
-u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
+u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u32 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
- val = xe_gt_sriov_vf_read32(gt, reg);
+ if (!reg.vf && mmio->sriov_vf_gt)
+ val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg);
else
- val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
+ val = readl(mmio->regs + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
+u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set)
{
u32 old, reg_val;
- old = xe_mmio_read32(gt, reg);
+ old = xe_mmio_read32(mmio, reg);
reg_val = (old & ~clr) | set;
- xe_mmio_write32(gt, reg, reg_val);
+ xe_mmio_write32(mmio, reg, reg_val);
return old;
}
-int xe_mmio_write32_and_verify(struct xe_gt *gt,
+int xe_mmio_write32_and_verify(struct xe_mmio *mmio,
struct xe_reg reg, u32 val, u32 mask, u32 eval)
{
u32 reg_val;
- xe_mmio_write32(gt, reg, val);
- reg_val = xe_mmio_read32(gt, reg);
+ xe_mmio_write32(mmio, reg, val);
+ reg_val = xe_mmio_read32(mmio, reg);
return (reg_val & mask) != eval ? -EINVAL : 0;
}
-bool xe_mmio_in_range(const struct xe_gt *gt,
+bool xe_mmio_in_range(const struct xe_mmio *mmio,
const struct xe_mmio_range *range,
struct xe_reg reg)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
return range && addr >= range->start && addr <= range->end;
}
/**
* xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
*
* Although Intel GPUs have some 64-bit registers, the hardware officially
@@ -307,21 +310,21 @@ bool xe_mmio_in_range(const struct xe_gt *gt,
*
* Returns the value of the 64-bit register.
*/
-u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
+u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg)
{
struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
u32 ldw, udw, oldudw, retries;
- reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
- reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);
+ reg.addr = xe_mmio_adjusted_addr(mmio, reg.addr);
+ reg_udw.addr = xe_mmio_adjusted_addr(mmio, reg_udw.addr);
/* we shouldn't adjust just one register address */
- xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);
+ xe_tile_assert(mmio->tile, reg_udw.addr == reg.addr + 0x4);
- oldudw = xe_mmio_read32(gt, reg_udw);
+ oldudw = xe_mmio_read32(mmio, reg_udw);
for (retries = 5; retries; --retries) {
- ldw = xe_mmio_read32(gt, reg);
- udw = xe_mmio_read32(gt, reg_udw);
+ ldw = xe_mmio_read32(mmio, reg);
+ udw = xe_mmio_read32(mmio, reg_udw);
if (udw == oldudw)
break;
@@ -329,13 +332,13 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
oldudw = udw;
}
- xe_gt_WARN(gt, retries == 0,
- "64-bit read of %#x did not stabilize\n", reg.addr);
+ drm_WARN(&mmio->tile->xe->drm, retries == 0,
+ "64-bit read of %#x did not stabilize\n", reg.addr);
return (u64)udw << 32 | ldw;
}
-static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic, bool expect_match)
{
ktime_t cur = ktime_get_raw();
@@ -346,7 +349,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
bool check;
for (;;) {
- read = xe_mmio_read32(gt, reg);
+ read = xe_mmio_read32(mmio, reg);
check = (read & mask) == val;
if (!expect_match)
@@ -372,7 +375,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
}
if (ret != 0) {
- read = xe_mmio_read32(gt, reg);
+ read = xe_mmio_read32(mmio, reg);
check = (read & mask) == val;
if (!expect_match)
@@ -390,7 +393,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
/**
* xe_mmio_wait32() - Wait for a register to match the desired masked value
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
* @mask: mask to be applied to the value read from the register
* @val: desired value after applying the mask
@@ -407,15 +410,15 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
* @timeout_us for different reasons, specially in non-atomic contexts. Thus,
* it is possible that this function succeeds even after @timeout_us has passed.
*/
-int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic)
{
- return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, true);
+ return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, true);
}
/**
* xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
* @mask: mask to be applied to the value read from the register
* @val: value not to be matched after applying the mask
@@ -426,8 +429,8 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t
* This function works exactly like xe_mmio_wait32() with the exception that
* @val is expected not to be matched.
*/
-int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic)
{
- return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, false);
+ return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false);
}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 26551410ecc8..8a46f4006a84 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -14,25 +14,30 @@ struct xe_reg;
int xe_mmio_init(struct xe_device *xe);
int xe_mmio_probe_tiles(struct xe_device *xe);
-u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);
-u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg);
-void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
-u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg);
-u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
-int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval);
-bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg);
-
-u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
-int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic);
-int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic);
-
-static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr)
+u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg);
+u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg);
+void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val);
+u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg);
+u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set);
+int xe_mmio_write32_and_verify(struct xe_mmio *mmio, struct xe_reg reg, u32 val, u32 mask, u32 eval);
+bool xe_mmio_in_range(const struct xe_mmio *mmio, const struct xe_mmio_range *range, struct xe_reg reg);
+
+u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg);
+int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val,
+ u32 timeout_us, u32 *out_val, bool atomic);
+int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask,
+ u32 val, u32 timeout_us, u32 *out_val, bool atomic);
+
+static inline u32 xe_mmio_adjusted_addr(const struct xe_mmio *mmio, u32 addr)
{
- if (addr < gt->mmio.adj_limit)
- addr += gt->mmio.adj_offset;
+ if (addr < mmio->adj_limit)
+ addr += mmio->adj_offset;
return addr;
}
+static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe)
+{
+ return &xe->tiles[0].mmio;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 7ff0ac5b799a..54d199b5cfb2 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -278,7 +278,7 @@ static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct d
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
j++,
@@ -310,7 +310,7 @@ static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n",
i,
@@ -383,7 +383,7 @@ static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
j++,
@@ -428,7 +428,7 @@ static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n",
j++,
@@ -510,7 +510,7 @@ static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u] (%#8x)\n",
i,
@@ -553,7 +553,7 @@ static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u] (%#8x)\n",
i,
@@ -576,6 +576,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
memset(info, 0, sizeof(struct xe_mocs_info));
switch (xe->info.platform) {
+ case XE_PANTHERLAKE:
case XE_LUNARLAKE:
case XE_BATTLEMAGE:
info->ops = &xe2_mocs_ops;
@@ -690,7 +691,7 @@ static void __init_mocs_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
else
- xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
+ xe_mmio_write32(&gt->mmio, XELP_GLOBAL_MOCS(i), mocs);
}
}
@@ -730,7 +731,7 @@ static void init_l3cc_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
else
- xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
+ xe_mmio_write32(&gt->mmio, XELP_LNCFCMOCS(i), l3cc);
}
}
@@ -773,25 +774,21 @@ void xe_mocs_init(struct xe_gt *gt)
void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_mocs_info table;
- unsigned int flags;
- u32 ret;
struct xe_device *xe = gt_to_xe(gt);
+ struct xe_mocs_info table;
+ unsigned int fw_ref, flags;
flags = get_mocs_settings(xe, &table);
xe_pm_runtime_get_noresume(xe);
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-
- if (ret)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
goto err_fw;
table.ops->dump(&table, flags, gt, p);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
err_fw:
- xe_assert(xe, !ret);
xe_pm_runtime_put(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 78823f53d290..8dd55798ab31 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -36,11 +36,22 @@
#include "xe_pm.h"
#include "xe_sched_job.h"
#include "xe_sriov.h"
+#include "xe_sync.h"
#define DEFAULT_POLL_FREQUENCY_HZ 200
#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
#define XE_OA_UNIT_INVALID U32_MAX
+enum xe_oa_submit_deps {
+ XE_OA_SUBMIT_NO_DEPS,
+ XE_OA_SUBMIT_ADD_DEPS,
+};
+
+enum xe_oa_user_extn_from {
+ XE_OA_USER_EXTN_FROM_OPEN,
+ XE_OA_USER_EXTN_FROM_CONFIG,
+};
+
struct xe_oa_reg {
struct xe_reg addr;
u32 value;
@@ -70,6 +81,7 @@ struct flex {
};
struct xe_oa_open_param {
+ struct xe_file *xef;
u32 oa_unit_id;
bool sample;
u32 metric_set;
@@ -81,6 +93,9 @@ struct xe_oa_open_param {
struct xe_exec_queue *exec_q;
struct xe_hw_engine *hwe;
bool no_preempt;
+ struct drm_xe_sync __user *syncs_user;
+ int num_syncs;
+ struct xe_sync_entry *syncs;
};
struct xe_oa_config_bo {
@@ -90,6 +105,17 @@ struct xe_oa_config_bo {
struct xe_bb *bb;
};
+struct xe_oa_fence {
+ /* @base: dma fence base */
+ struct dma_fence base;
+ /* @lock: lock for the fence */
+ spinlock_t lock;
+ /* @work: work to signal @base */
+ struct delayed_work work;
+ /* @cb: callback to schedule @work */
+ struct dma_fence_cb cb;
+};
+
#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
static const struct xe_oa_format oa_formats[] = {
@@ -162,10 +188,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
return oa_config;
}
-static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
+static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence)
{
xe_oa_config_put(oa_bo->oa_config);
- xe_bb_free(oa_bo->bb, NULL);
+ xe_bb_free(oa_bo->bb, last_fence);
kfree(oa_bo);
}
@@ -176,7 +202,7 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
{
- return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) &
+ return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) &
OAG_OATAILPTR_MASK;
}
@@ -366,7 +392,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, oaheadptr,
+ xe_mmio_write32(&stream->gt->mmio, oaheadptr,
(head + gtt_offset) & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -377,22 +403,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
unsigned long flags;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
*/
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointer to read from */
@@ -444,21 +471,23 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
val |= OAG_OACONTROL_OA_PES_DISAG_EN;
- xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
+ xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val);
}
static void xe_oa_disable(struct xe_oa_stream *stream)
{
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0);
- if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl,
+ struct xe_mmio *mmio = &stream->gt->mmio;
+
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0);
+ if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl,
OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA to be disabled timed out\n");
if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) {
/* <= XE_METEORLAKE except XE_PVC */
- xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1);
- if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
+ xe_mmio_write32(mmio, OA_TLB_INV_CR, 1);
+ if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA tlb invalidate timed out\n");
}
@@ -481,7 +510,7 @@ static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
/* Only clear our bits to avoid side-effects */
- stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status,
+ stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status,
OASTATUS_RELEVANT_BITS, 0);
/*
* Signal to userspace that there is non-zero OA status to read via
@@ -567,11 +596,11 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
return ret;
}
-static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps,
+ struct xe_bb *bb)
{
struct xe_sched_job *job;
struct dma_fence *fence;
- long timeout;
int err = 0;
/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
@@ -581,18 +610,24 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
goto exit;
}
+ if (deps == XE_OA_SUBMIT_ADD_DEPS) {
+ for (int i = 0; i < stream->num_syncs && !err; i++)
+ err = xe_sync_entry_add_deps(&stream->syncs[i], job);
+ if (err) {
+ drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err);
+ goto err_put_job;
+ }
+ }
+
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
- timeout = dma_fence_wait_timeout(fence, false, HZ);
- dma_fence_put(fence);
- if (timeout < 0)
- err = timeout;
- else if (!timeout)
- err = -ETIME;
+ return fence;
+err_put_job:
+ xe_sched_job_put(job);
exit:
- return err;
+ return ERR_PTR(err);
}
static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
@@ -636,7 +671,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
xe_oa_config_put(stream->oa_config);
llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
- free_oa_config_bo(oa_bo);
+ free_oa_config_bo(oa_bo, stream->last_fence);
+ dma_fence_put(stream->last_fence);
}
static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
@@ -656,6 +692,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
const struct flex *flex, u32 count)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -667,7 +704,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
xe_oa_store_flex(stream, lrc, bb, flex, count);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -675,6 +721,7 @@ exit:
static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -686,7 +733,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
write_cs_mi_lri(bb, reg_lri, 1);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -749,7 +805,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
int err;
/* Set ccs select to enable programming of OAC_OACONTROL */
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream));
+ xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl,
+ __oa_ccs_select(stream));
/* Modify stream hwe context image with regs_context */
err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
@@ -785,6 +842,7 @@ static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool en
static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 sqcnt1;
/*
@@ -798,7 +856,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
_MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
}
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
oag_configure_mmio_trigger(stream, false));
/* disable the context save/restore or OAR counters */
@@ -806,13 +864,13 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
xe_oa_configure_oa_context(stream, false);
/* Make sure we disable noa to save power. */
- xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
+ xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0);
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
/* Reset PMON Enable to save power. */
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0);
}
static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
@@ -832,7 +890,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_oa_free_oa_buffer(stream);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(stream->oa->xe);
/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */
@@ -840,6 +898,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
xe_oa_free_configs(stream);
+ xe_file_put(stream->xef);
}
static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
@@ -910,11 +969,62 @@ out:
return oa_bo;
}
+static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence)
+{
+ dma_fence_put(stream->last_fence);
+ stream->last_fence = dma_fence_get(fence);
+}
+
+static void xe_oa_fence_work_fn(struct work_struct *w)
+{
+ struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work);
+
+ /* Signal fence to indicate new OA configuration is active */
+ dma_fence_signal(&ofence->base);
+ dma_fence_put(&ofence->base);
+}
+
+static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ /* Additional empirical delay needed for NOA programming after registers are written */
+#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
+
+ struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb);
+
+ INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
+ queue_delayed_work(system_unbound_wq, &ofence->work,
+ usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US));
+ dma_fence_put(fence);
+}
+
+static const char *xe_oa_get_driver_name(struct dma_fence *fence)
+{
+ return "xe_oa";
+}
+
+static const char *xe_oa_get_timeline_name(struct dma_fence *fence)
+{
+ return "unbound";
+}
+
+static const struct dma_fence_ops xe_oa_fence_ops = {
+ .get_driver_name = xe_oa_get_driver_name,
+ .get_timeline_name = xe_oa_get_timeline_name,
+};
+
static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
{
#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
struct xe_oa_config_bo *oa_bo;
- int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
+ struct xe_oa_fence *ofence;
+ int i, err, num_signal = 0;
+ struct dma_fence *fence;
+
+ ofence = kzalloc(sizeof(*ofence), GFP_KERNEL);
+ if (!ofence) {
+ err = -ENOMEM;
+ goto exit;
+ }
oa_bo = xe_oa_alloc_config_buffer(stream, config);
if (IS_ERR(oa_bo)) {
@@ -922,11 +1032,50 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
goto exit;
}
- err = xe_oa_submit_bb(stream, oa_bo->bb);
+ /* Emit OA configuration batch */
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto exit;
+ }
- /* Additional empirical delay needed for NOA programming after registers are written */
- usleep_range(us, 2 * us);
+ /* Point of no return: initialize and set fence to signal */
+ spin_lock_init(&ofence->lock);
+ dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0);
+
+ for (i = 0; i < stream->num_syncs; i++) {
+ if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
+ num_signal++;
+ xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
+ }
+
+ /* Additional dma_fence_get in case we dma_fence_wait */
+ if (!num_signal)
+ dma_fence_get(&ofence->base);
+
+ /* Update last fence too before adding callback */
+ xe_oa_update_last_fence(stream, fence);
+
+ /* Add job fence callback to schedule work to signal ofence->base */
+ err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
+ xe_gt_assert(stream->gt, !err || err == -ENOENT);
+ if (err == -ENOENT)
+ xe_oa_config_cb(fence, &ofence->cb);
+
+ /* If nothing needs to be signaled we wait synchronously */
+ if (!num_signal) {
+ dma_fence_wait(&ofence->base, false);
+ dma_fence_put(&ofence->base);
+ }
+
+ /* Done with syncs */
+ for (i = 0; i < stream->num_syncs; i++)
+ xe_sync_entry_cleanup(&stream->syncs[i]);
+ kfree(stream->syncs);
+
+ return 0;
exit:
+ kfree(ofence);
return err;
}
@@ -940,6 +1089,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 oa_debug, sqcnt1;
int ret;
@@ -966,12 +1116,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL |
OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL;
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
_MASKED_BIT_ENABLE(oa_debug) |
oag_report_ctx_switches(stream) |
oag_configure_mmio_trigger(stream, true));
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
(OAG_OAGLBCTXCTRL_COUNTER_RESUME |
OAG_OAGLBCTXCTRL_TIMER_ENABLE |
REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK,
@@ -985,7 +1135,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1);
/* Configure OAR/OAC */
if (stream->exec_q) {
@@ -997,6 +1147,262 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
return xe_oa_emit_oa_config(stream, stream->oa_config);
}
+static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
+{
+ u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
+ u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
+ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
+ u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
+ int idx;
+
+ for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
+ const struct xe_oa_format *f = &oa->oa_formats[idx];
+
+ if (counter_size == f->counter_size && bc_report == f->bc_report &&
+ type == f->type && counter_sel == f->counter_select) {
+ *name = idx;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (value >= oa->oa_unit_ids) {
+ drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
+ return -EINVAL;
+ }
+ param->oa_unit_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->sample = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->metric_set = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ int ret = decode_oa_format(oa, value, &param->oa_format);
+
+ if (ret) {
+ drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
+ return ret;
+ }
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+#define OA_EXPONENT_MAX 31
+
+ if (value > OA_EXPONENT_MAX) {
+ drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
+ return -EINVAL;
+ }
+ param->period_exponent = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->disabled = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->exec_queue_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->engine_instance = value;
+ return 0;
+}
+
+static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->no_preempt = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->num_syncs = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->syncs_user = u64_to_user_ptr(value);
+ return 0;
+}
+
+static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ return -EINVAL;
+}
+
+typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param);
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_ext_set_property ext;
+ int err;
+ u32 idx;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) !=
+ ARRAY_SIZE(xe_oa_set_property_funcs_config));
+
+ if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) ||
+ XE_IOCTL_DBG(oa->xe, ext.pad))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open));
+
+ if (from == XE_OA_USER_EXTN_FROM_CONFIG)
+ return xe_oa_set_property_funcs_config[idx](oa, ext.value, param);
+ else
+ return xe_oa_set_property_funcs_open[idx](oa, ext.value, param);
+}
+
+typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param);
+static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
+ [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS 16
+static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension,
+ int ext_number, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_user_extension ext;
+ int err;
+ u32 idx;
+
+ if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
+ return -E2BIG;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
+ XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
+ err = xe_oa_user_extension_funcs[idx](oa, from, extension, param);
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return err;
+
+ if (ext.next_extension)
+ return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param);
+
+ return 0;
+}
+
+static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
+{
+ int ret, num_syncs, num_ufence = 0;
+
+ if (param->num_syncs && !param->syncs_user) {
+ drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (param->num_syncs) {
+ param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL);
+ if (!param->syncs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ }
+
+ for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
+ ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs],
+ &param->syncs_user[num_syncs], 0);
+ if (ret)
+ goto err_syncs;
+
+ if (xe_sync_is_ufence(&param->syncs[num_syncs]))
+ num_ufence++;
+ }
+
+ if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) {
+ ret = -EINVAL;
+ goto err_syncs;
+ }
+
+ return 0;
+
+err_syncs:
+ while (num_syncs--)
+ xe_sync_entry_cleanup(&param->syncs[num_syncs]);
+ kfree(param->syncs);
+exit:
+ return ret;
+}
+
static void xe_oa_stream_enable(struct xe_oa_stream *stream)
{
stream->pollin = false;
@@ -1090,36 +1496,38 @@ static int xe_oa_disable_locked(struct xe_oa_stream *stream)
static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
{
- struct drm_xe_ext_set_property ext;
+ struct xe_oa_open_param param = {};
long ret = stream->oa_config->id;
struct xe_oa_config *config;
int err;
- err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext));
- if (XE_IOCTL_DBG(stream->oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET))
- return -EINVAL;
+ err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, &param);
+ if (err)
+ return err;
- config = xe_oa_get_oa_config(stream->oa, ext.value);
+ config = xe_oa_get_oa_config(stream->oa, param.metric_set);
if (!config)
return -ENODEV;
- if (config != stream->oa_config) {
- err = xe_oa_emit_oa_config(stream, config);
- if (!err)
- config = xchg(&stream->oa_config, config);
- else
- ret = err;
+ param.xef = stream->xef;
+ err = xe_oa_parse_syncs(stream->oa, &param);
+ if (err)
+ goto err_config_put;
+
+ stream->num_syncs = param.num_syncs;
+ stream->syncs = param.syncs;
+
+ err = xe_oa_emit_oa_config(stream, config);
+ if (!err) {
+ config = xchg(&stream->oa_config, config);
+ drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n",
+ stream->oa_config->uuid);
}
+err_config_put:
xe_oa_config_put(config);
- return ret;
+ return err ?: ret;
}
static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
@@ -1349,6 +1757,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
{
struct xe_oa_unit *u = param->hwe->oa_unit;
struct xe_gt *gt = param->hwe->gt;
+ unsigned int fw_ref;
int ret;
stream->exec_q = param->exec_q;
@@ -1362,6 +1771,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->period_exponent = param->period_exponent;
stream->no_preempt = param->no_preempt;
+ stream->xef = xe_file_get(param->xef);
+ stream->num_syncs = param->num_syncs;
+ stream->syncs = param->syncs;
+
/*
* For Xe2+, when overrun mode is enabled, there are no partial reports at the end
* of buffer, making the OA buffer effectively a non-power-of-2 size circular
@@ -1409,7 +1822,11 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(stream->oa->xe);
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ ret = -ETIMEDOUT;
+ goto err_fw_put;
+ }
ret = xe_oa_alloc_oa_buffer(stream);
if (ret)
@@ -1451,13 +1868,14 @@ err_put_k_exec_q:
err_free_oa_buf:
xe_oa_free_oa_buffer(stream);
err_fw_put:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(stream->oa->xe);
if (stream->override_gucrc)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
err_free_configs:
xe_oa_free_configs(stream);
exit:
+ xe_file_put(stream->xef);
return ret;
}
@@ -1535,7 +1953,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
case XE_PVC:
case XE_METEORLAKE:
xe_pm_runtime_get(gt_to_xe(gt));
- reg = xe_mmio_read32(gt, RPM_CONFIG0);
+ reg = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
xe_pm_runtime_put(gt_to_xe(gt));
shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
@@ -1567,27 +1985,6 @@ static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
}
}
-static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
-{
- u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
- u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
- u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
- u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
- int idx;
-
- for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
- const struct xe_oa_format *f = &oa->oa_formats[idx];
-
- if (counter_size == f->counter_size && bc_report == f->bc_report &&
- type == f->type && counter_sel == f->counter_select) {
- *name = idx;
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
/**
* xe_oa_unit_id - Return OA unit ID for a hardware engine
* @hwe: @xe_hw_engine
@@ -1634,155 +2031,6 @@ out:
return ret;
}
-static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- if (value >= oa->oa_unit_ids) {
- drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
- return -EINVAL;
- }
- param->oa_unit_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->sample = value;
- return 0;
-}
-
-static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->metric_set = value;
- return 0;
-}
-
-static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- int ret = decode_oa_format(oa, value, &param->oa_format);
-
- if (ret) {
- drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
- return ret;
- }
- return 0;
-}
-
-static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
-#define OA_EXPONENT_MAX 31
-
- if (value > OA_EXPONENT_MAX) {
- drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
- return -EINVAL;
- }
- param->period_exponent = value;
- return 0;
-}
-
-static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->disabled = value;
- return 0;
-}
-
-static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->exec_queue_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->engine_instance = value;
- return 0;
-}
-
-static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->no_preempt = value;
- return 0;
-}
-
-typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param);
-static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
- [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
- [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
- [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
- [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
- [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
- [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
- [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
- [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
- [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
-};
-
-static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_ext_set_property ext;
- int err;
- u32 idx;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) ||
- XE_IOCTL_DBG(oa->xe, ext.pad))
- return -EINVAL;
-
- idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs));
- return xe_oa_set_property_funcs[idx](oa, ext.value, param);
-}
-
-typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param);
-static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
- [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
-};
-
-#define MAX_USER_EXTENSIONS 16
-static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_user_extension ext;
- int err;
- u32 idx;
-
- if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
- return -E2BIG;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
- XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
- return -EINVAL;
-
- idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
- err = xe_oa_user_extension_funcs[idx](oa, extension, param);
- if (XE_IOCTL_DBG(oa->xe, err))
- return err;
-
- if (ext.next_extension)
- return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param);
-
- return 0;
-}
-
/**
* xe_oa_stream_open_ioctl - Opens an OA stream
* @dev: @drm_device
@@ -1808,7 +2056,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
return -ENODEV;
}
- ret = xe_oa_user_extensions(oa, data, 0, &param);
+ param.xef = xef;
+ ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, &param);
if (ret)
return ret;
@@ -1876,11 +2125,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
}
+ ret = xe_oa_parse_syncs(oa, &param);
+ if (ret)
+ goto err_exec_q;
+
mutex_lock(&param.hwe->gt->oa.gt_lock);
ret = xe_oa_stream_open_ioctl_locked(oa, &param);
mutex_unlock(&param.hwe->gt->oa.gt_lock);
+ if (ret < 0)
+ goto err_sync_cleanup;
+
+ return ret;
+
+err_sync_cleanup:
+ while (param.num_syncs--)
+ xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
+ kfree(param.syncs);
err_exec_q:
- if (ret < 0 && param.exec_q)
+ if (param.exec_q)
xe_exec_queue_put(param.exec_q);
return ret;
}
@@ -2351,7 +2613,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt)
}
/* Ensure MMIO trigger remains disabled till there is a stream */
- xe_mmio_write32(gt, u->regs.oa_debug,
+ xe_mmio_write32(&gt->mmio, u->regs.oa_debug,
oag_configure_mmio_trigger(NULL, false));
/* Set oa_unit_ids now to ensure ids remain contiguous */
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8862eca73fbe..fea9d981e414 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -238,5 +238,17 @@ struct xe_oa_stream {
/** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */
u32 no_preempt;
+
+ /** @xef: xe_file with which the stream was opened */
+ struct xe_file *xef;
+
+ /** @last_fence: fence to use in stream destroy when needed */
+ struct dma_fence *last_fence;
+
+ /** @num_syncs: size of @syncs array */
+ u32 num_syncs;
+
+ /** @syncs: syncs to wait on and to signal */
+ struct xe_sync_entry *syncs;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index f291a1730024..30fdbdb9341e 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -100,6 +100,10 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
* Reserved entries should be programmed with the maximum caching, minimum
* coherency (which matches an all-0's encoding), so we can just omit them
* in the table.
+ *
+ * Note: There is an implicit assumption in the driver that compression and
+ * coh_1way+ are mutually exclusive. If this is ever not true then userptr
+ * and imported dma-buf from external device will have uncleared ccs state.
*/
#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \
{ \
@@ -109,7 +113,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
- .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+ .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \
+ XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
}
static const struct xe_pat_table_entry xe2_pat_table[] = {
@@ -160,7 +165,7 @@ static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[
for (int i = 0; i < n_entries; i++) {
struct xe_reg reg = XE_REG(_PAT_INDEX(i));
- xe_mmio_write32(gt, reg, table[i].value);
+ xe_mmio_write32(&gt->mmio, reg, table[i].value);
}
}
@@ -177,25 +182,24 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta
static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
for (i = 0; i < xe->pat.n_entries; i++) {
- u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ u32 pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
XELP_MEM_TYPE_STR_MAP[mem_type], pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xelp_pat_ops = {
@@ -206,11 +210,12 @@ static const struct xe_pat_ops xelp_pat_ops = {
static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -224,9 +229,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
XELP_MEM_TYPE_STR_MAP[mem_type], pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xehp_pat_ops = {
@@ -237,11 +240,12 @@ static const struct xe_pat_ops xehp_pat_ops = {
static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -253,9 +257,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xehpc_pat_ops = {
@@ -266,11 +268,12 @@ static const struct xe_pat_ops xehpc_pat_ops = {
static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -278,7 +281,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
u32 pat;
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
@@ -287,9 +290,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/*
@@ -316,27 +317,28 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
int n_entries)
{
program_pat(gt, table, n_entries);
- xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value);
if (IS_DGFX(gt_to_xe(gt)))
- xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value);
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value);
}
static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
u32 pat;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
for (i = 0; i < xe->pat.n_entries; i++) {
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
@@ -355,7 +357,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
* PPGTT entries.
*/
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_PTA));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
@@ -369,9 +371,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XE2_COH_MODE, pat),
pat);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xe2_pat_ops = {
@@ -382,7 +382,7 @@ static const struct xe_pat_ops xe2_pat_ops = {
void xe_pat_init_early(struct xe_device *xe)
{
- if (GRAPHICS_VER(xe) == 20) {
+ if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 5e962e72c97e..6b7f77425c7f 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -13,7 +13,7 @@
#include <drm/drm_color_mgmt.h>
#include <drm/drm_drv.h>
-#include <drm/intel/xe_pciids.h>
+#include <drm/intel/pciids.h>
#include "display/xe_display.h"
#include "regs/xe_gt_regs.h"
@@ -103,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpp = {
#define XE_HP_FEATURES \
.has_range_tlb_invalidation = true, \
- .has_flat_ccs = true, \
.dma_mask_size = 46, \
.va_bits = 48, \
.vm_max_level = 3
@@ -120,6 +119,8 @@ static const struct xe_graphics_desc graphics_xehpg = {
XE_HP_FEATURES,
.vram_flags = XE_VRAM_FLAGS_NEED64K,
+
+ .has_flat_ccs = 1,
};
static const struct xe_graphics_desc graphics_xehpc = {
@@ -145,7 +146,6 @@ static const struct xe_graphics_desc graphics_xehpc = {
.has_asid = 1,
.has_atomic_enable_pte_bit = 1,
- .has_flat_ccs = 0,
.has_usm = 1,
};
@@ -156,7 +156,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
BIT(XE_HW_ENGINE_CCS0),
XE_HP_FEATURES,
- .has_flat_ccs = 0,
};
#define XE2_GFX_FEATURES \
@@ -175,7 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
static const struct xe_graphics_desc graphics_xe2 = {
- .name = "Xe2_LPG / Xe2_HPG",
+ .name = "Xe2_LPG / Xe2_HPG / Xe3_LPG",
XE2_GFX_FEATURES,
};
@@ -209,7 +208,7 @@ static const struct xe_media_desc media_xelpmp = {
};
static const struct xe_media_desc media_xe2 = {
- .name = "Xe2_LPM / Xe2_HPM",
+ .name = "Xe2_LPM / Xe2_HPM / Xe3_LPM",
.hw_engine_mask =
GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) |
@@ -234,7 +233,7 @@ static const struct xe_device_desc rkl_desc = {
.require_force_probe = true,
};
-static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 };
+static const u16 adls_rpls_ids[] = { INTEL_RPLS_IDS(NOP), 0 };
static const struct xe_device_desc adl_s_desc = {
.graphics = &graphics_xelp,
@@ -249,7 +248,7 @@ static const struct xe_device_desc adl_s_desc = {
},
};
-static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
+static const u16 adlp_rplu_ids[] = { INTEL_RPLU_IDS(NOP), 0 };
static const struct xe_device_desc adl_p_desc = {
.graphics = &graphics_xelp,
@@ -286,9 +285,9 @@ static const struct xe_device_desc dg1_desc = {
.require_force_probe = true,
};
-static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
-static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
-static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
+static const u16 dg2_g10_ids[] = { INTEL_DG2_G10_IDS(NOP), INTEL_ATS_M150_IDS(NOP), 0 };
+static const u16 dg2_g11_ids[] = { INTEL_DG2_G11_IDS(NOP), INTEL_ATS_M75_IDS(NOP), 0 };
+static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 };
#define DG2_FEATURES \
DGFX_FEATURES, \
@@ -347,6 +346,12 @@ static const struct xe_device_desc bmg_desc = {
.has_heci_cscfi = 1,
};
+static const struct xe_device_desc ptl_desc = {
+ PLATFORM(PANTHERLAKE),
+ .has_display = true,
+ .require_force_probe = true,
+};
+
#undef PLATFORM
__diag_pop();
@@ -357,6 +362,8 @@ static const struct gmdid_map graphics_ip_map[] = {
{ 1274, &graphics_xelpg }, /* Xe_LPG+ */
{ 2001, &graphics_xe2 },
{ 2004, &graphics_xe2 },
+ { 3000, &graphics_xe2 },
+ { 3001, &graphics_xe2 },
};
/* Map of GMD_ID values to media IP */
@@ -364,13 +371,9 @@ static const struct gmdid_map media_ip_map[] = {
{ 1300, &media_xelpmp },
{ 1301, &media_xe2 },
{ 2000, &media_xe2 },
+ { 3000, &media_xe2 },
};
-#define INTEL_VGA_DEVICE(id, info) { \
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \
- PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \
- (unsigned long) info }
-
/*
* Make sure any device matches here are from most specific to most
* general. For example, since the Quanta match is based on the subsystem
@@ -378,25 +381,26 @@ static const struct gmdid_map media_ip_map[] = {
* PCI ID matches, otherwise we'll use the wrong info struct above.
*/
static const struct pci_device_id pciidlist[] = {
- XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
- XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
- XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
- XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
- XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
- XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
- XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
- XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
- XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
- XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
- XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
- XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
- XE_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc),
+ INTEL_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+ INTEL_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
+ INTEL_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+ INTEL_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+ INTEL_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
+ INTEL_RPLU_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+ INTEL_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+ INTEL_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+ INTEL_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
+ INTEL_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+ INTEL_ARL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+ INTEL_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
+ INTEL_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+ INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
+ INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc),
+ INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc),
{ }
};
MODULE_DEVICE_TABLE(pci, pciidlist);
-#undef INTEL_VGA_DEVICE
-
/* is device_id present in comma separated list of ids */
static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
{
@@ -467,13 +471,15 @@ enum xe_gmdid_type {
static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct xe_reg gmdid_reg = GMD_ID;
u32 val;
KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
if (IS_SRIOV_VF(xe)) {
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+
/*
* To get the value of the GMDID register, VFs must obtain it
* from the GuC using MMIO communication.
@@ -509,14 +515,17 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
gt->info.type = XE_GT_TYPE_UNINITIALIZED;
} else {
/*
- * We need to apply the GSI offset explicitly here as at this
- * point the xe_gt is not fully uninitialized and only basic
- * access to MMIO registers is possible.
+ * GMD_ID is a GT register, but at this point in the driver
+ * init we haven't fully initialized the GT yet so we need to
+ * read the register with the tile's MMIO accessor. That means
+ * we need to apply the GSI offset manually since it won't get
+ * automatically added as it would if we were using a GT mmio
+ * accessor.
*/
if (type == GMDID_MEDIA)
gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
- val = xe_mmio_read32(gt, gmdid_reg);
+ val = xe_mmio_read32(mmio, gmdid_reg);
}
*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
@@ -678,7 +687,10 @@ static int xe_info_init(struct xe_device *xe,
xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
if (xe->info.platform != XE_PVC)
xe->info.has_device_atomics_on_smem = 1;
+
+ /* Runtime detection may change this later */
xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
+
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
@@ -707,6 +719,7 @@ static int xe_info_init(struct xe_device *xe,
gt->info.type = XE_GT_TYPE_MAIN;
gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
gt->info.engine_mask = graphics_desc->hw_engine_mask;
+
if (MEDIA_VER(xe) < 13 && media_desc)
gt->info.engine_mask |= media_desc->hw_engine_mask;
@@ -725,8 +738,6 @@ static int xe_info_init(struct xe_device *xe,
gt->info.type = XE_GT_TYPE_MEDIA;
gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
gt->info.engine_mask = media_desc->hw_engine_mask;
- gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
- gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
/*
* FIXME: At the moment multi-tile and standalone media are
@@ -757,6 +768,25 @@ static void xe_pci_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
}
+/*
+ * Probe the PCI device, initialize various parts of the driver.
+ *
+ * Fault injection is used to test the error paths of some initialization
+ * functions called either directly from xe_pci_probe() or indirectly for
+ * example through xe_device_probe(). Those functions use the kernel fault
+ * injection capabilities infrastructure, see
+ * Documentation/fault-injection/fault-injection.rst for details. The macro
+ * ALLOW_ERROR_INJECTION() is used to conditionally skip function execution
+ * at runtime and use a provided return value. The first requirement for
+ * error injectable functions is proper handling of the error code by the
+ * caller for recovery, which is always the case here. The second
+ * requirement is that no state is changed before the first error return.
+ * It is not strictly fullfilled for all initialization functions using the
+ * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those
+ * error cases at probe time, the error code is simply propagated up by the
+ * caller. Therefore there is no consequence on those specific callers when
+ * function error injection skips the whole function.
+ */
static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
const struct xe_device_desc *desc = (const void *)ent->driver_data;
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 7397d556996a..d95d9835de42 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -44,7 +44,7 @@ static int pcode_mailbox_status(struct xe_tile *tile)
[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
};
- err = xe_mmio_read32(tile->primary_gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
+ err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK;
if (err) {
drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err,
err_decode[err].str ?: "Unknown");
@@ -58,7 +58,7 @@ static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *d
unsigned int timeout_ms, bool return_data,
bool atomic)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
int err;
if (tile_to_xe(tile)->info.skip_pcode)
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 79b7042c4534..d08574c4cdb8 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -23,6 +23,7 @@ enum xe_platform {
XE_METEORLAKE,
XE_LUNARLAKE,
XE_BATTLEMAGE,
+ XE_PANTHERLAKE,
};
enum xe_subplatform {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 33eb039053e4..40f7c844ed44 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -5,6 +5,7 @@
#include "xe_pm.h"
+#include <linux/fault-inject.h>
#include <linux/pm_runtime.h>
#include <drm/drm_managed.h>
@@ -263,6 +264,7 @@ int xe_pm_init_early(struct xe_device *xe)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
/**
* xe_pm_init - Initialize Xe Power Management
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 848da8e68c7a..170ae72d1a7b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -9,6 +9,7 @@
#include <linux/sched/clock.h>
#include <drm/ttm/ttm_placement.h>
+#include <generated/xe_wa_oob.h>
#include <uapi/drm/xe_drm.h>
#include "regs/xe_engine_regs.h"
@@ -23,6 +24,7 @@
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
static const u16 xe_to_user_engine_class[] = {
[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
@@ -83,24 +85,22 @@ static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
}
static void
-__read_timestamps(struct xe_gt *gt,
- struct xe_reg lower_reg,
- struct xe_reg upper_reg,
- u64 *engine_ts,
- u64 *cpu_ts,
- u64 *cpu_delta,
- __ktime_func_t cpu_clock)
+hwe_read_timestamp(struct xe_hw_engine *hwe, u64 *engine_ts, u64 *cpu_ts,
+ u64 *cpu_delta, __ktime_func_t cpu_clock)
{
+ struct xe_mmio *mmio = &hwe->gt->mmio;
u32 upper, lower, old_upper, loop = 0;
+ struct xe_reg upper_reg = RING_TIMESTAMP_UDW(hwe->mmio_base),
+ lower_reg = RING_TIMESTAMP(hwe->mmio_base);
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
do {
*cpu_delta = local_clock();
*cpu_ts = cpu_clock();
- lower = xe_mmio_read32(gt, lower_reg);
+ lower = xe_mmio_read32(mmio, lower_reg);
*cpu_delta = local_clock() - *cpu_delta;
old_upper = upper;
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
} while (upper != old_upper && loop++ < 2);
*engine_ts = (u64)upper << 32 | lower;
@@ -117,6 +117,7 @@ query_engine_cycles(struct xe_device *xe,
__ktime_func_t cpu_clock;
struct xe_hw_engine *hwe;
struct xe_gt *gt;
+ unsigned int fw_ref;
if (query->size == 0) {
query->size = size;
@@ -149,18 +150,16 @@ query_engine_cycles(struct xe_device *xe,
if (!hwe)
return -EINVAL;
- if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -EIO;
+ }
- __read_timestamps(gt,
- RING_TIMESTAMP(hwe->mmio_base),
- RING_TIMESTAMP_UDW(hwe->mmio_base),
- &resp.engine_cycles,
- &resp.cpu_timestamp,
- &resp.cpu_delta,
- cpu_clock);
+ hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp,
+ &resp.cpu_delta, cpu_clock);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
if (GRAPHICS_VER(xe) >= 20)
resp.width = 64;
@@ -168,16 +167,10 @@ query_engine_cycles(struct xe_device *xe,
resp.width = 36;
/* Only write to the output fields of user query */
- if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
- return -EFAULT;
-
- if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
- return -EFAULT;
-
- if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
- return -EFAULT;
-
- if (put_user(resp.width, &query_ptr->width))
+ if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp) ||
+ put_user(resp.cpu_delta, &query_ptr->cpu_delta) ||
+ put_user(resp.engine_cycles, &query_ptr->engine_cycles) ||
+ put_user(resp.width, &query_ptr->width))
return -EFAULT;
return 0;
@@ -458,12 +451,23 @@ static int query_hwconfig(struct xe_device *xe,
static size_t calc_topo_query_size(struct xe_device *xe)
{
- return xe->info.gt_count *
- (4 * sizeof(struct drm_xe_query_topology_mask) +
- sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
- sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+ struct xe_gt *gt;
+ size_t query_size = 0;
+ int id;
+
+ for_each_gt(gt, xe, id) {
+ query_size += 3 * sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
+
+ /* L3bank mask may not be available for some GTs */
+ if (!XE_WA(gt, no_media_l3))
+ query_size += sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
+ }
+
+ return query_size;
}
static int copy_mask(void __user **ptr,
@@ -516,11 +520,18 @@ static int query_gt_topology(struct xe_device *xe,
if (err)
return err;
- topo.type = DRM_XE_TOPO_L3_BANK;
- err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
- sizeof(gt->fuse_topo.l3_bank_mask));
- if (err)
- return err;
+ /*
+ * If the kernel doesn't have a way to obtain a correct L3bank
+ * mask, then it's better to omit L3 from the query rather than
+ * reporting bogus or zeroed information to userspace.
+ */
+ if (!XE_WA(gt, no_media_l3)) {
+ topo.type = DRM_XE_TOPO_L3_BANK;
+ err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
+ sizeof(gt->fuse_topo.l3_bank_mask));
+ if (err)
+ return err;
+ }
topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
DRM_XE_TOPO_SIMD16_EU_PER_DSS :
@@ -659,7 +670,7 @@ static int query_oa_units(struct xe_device *xe,
du->oa_unit_id = u->oa_unit_id;
du->oa_unit_type = u->type;
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
- du->capabilities = DRM_XE_OA_CAPS_BASE;
+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 440ac572f6e5..e1a0e27cda14 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -15,6 +15,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
@@ -164,7 +165,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
else if (entry->clr_bits + 1)
val = (reg.mcr ?
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
- xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+ xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
else
val = 0;
@@ -180,34 +181,34 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
if (entry->reg.mcr)
xe_gt_mcr_multicast_write(gt, reg_mcr, val);
else
- xe_mmio_write32(gt, reg, val);
+ xe_mmio_write32(&gt->mmio, reg, val);
}
void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
{
struct xe_reg_sr_entry *entry;
unsigned long reg;
- int err;
+ unsigned int fw_ref;
if (xa_empty(&sr->xa))
return;
xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
- err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_force_wake;
xa_for_each(&sr->xa, reg, entry)
apply_one_mmio(gt, entry);
- err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return;
err_force_wake:
- xe_gt_err(gt, "Failed to apply, err=%d\n", err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
}
void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
@@ -220,15 +221,15 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
u32 mmio_base = hwe->mmio_base;
unsigned long reg;
unsigned int slot = 0;
- int err;
+ unsigned int fw_ref;
if (xa_empty(&sr->xa))
return;
drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
- err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_force_wake;
p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
@@ -241,7 +242,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
}
xe_reg_whitelist_print_entry(&p, 0, reg, entry);
- xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot),
+ xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot),
reg | entry->set_bits);
slot++;
}
@@ -250,16 +251,16 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
u32 addr = RING_NOPID(mmio_base).addr;
- xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
+ xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
}
- err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return;
err_force_wake:
- drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n");
}
/**
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 86c705d18c0d..b13d4d62f0b1 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -196,7 +196,7 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
*gt = (*hwe)->gt;
*xe = gt_to_xe(*gt);
break;
- };
+ }
}
/**
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index fe2cb2a96f78..e055bed7ae55 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -53,7 +53,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
if (IS_ERR(bo)) {
drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
PTR_ERR(bo));
- return (struct xe_sa_manager *)bo;
+ return ERR_CAST(bo);
}
sa_manager->bo = bo;
sa_manager->is_iomem = bo->vmap.is_iomem;
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index eeccc1c318ae..1905ca590965 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -280,7 +280,7 @@ void xe_sched_job_arm(struct xe_sched_job *job)
fence = &chain->base;
}
- job->fence = fence;
+ job->fence = dma_fence_get(fence); /* Pairs with put in scheduler */
drm_sched_job_arm(&job->drm);
}
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 0d3f76fb05ce..f13f333f00be 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -40,7 +40,6 @@ struct xe_sched_job {
* @fence: dma fence to indicate completion. 1 way relationship - job
* can safely reference fence, fence cannot safely reference job.
*/
-#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS
struct dma_fence *fence;
/** @user_fence: write back value when BB is complete */
struct {
@@ -63,7 +62,7 @@ struct xe_sched_job {
struct xe_sched_job_snapshot {
u16 batch_addr_len;
- u64 batch_addr[];
+ u64 batch_addr[] __counted_by(batch_addr_len);
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 5a1d65e4f19f..ef10782af656 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include "regs/xe_regs.h"
@@ -35,7 +37,7 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
static bool test_is_vf(struct xe_device *xe)
{
- u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+ u32 value = xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG);
return value & VF_CAP;
}
@@ -119,6 +121,7 @@ int xe_sriov_init(struct xe_device *xe)
return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe);
}
+ALLOW_ERROR_INJECTION(xe_sriov_init, ERRNO); /* See xe_pci_probe() */
/**
* xe_sriov_print_info - Print basic SR-IOV information.
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 2e72c06fd40d..42f5bebd09e5 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -83,10 +83,16 @@ static void user_fence_worker(struct work_struct *w)
XE_WARN_ON("Copy to user failed");
kthread_unuse_mm(ufence->mm);
mmput(ufence->mm);
+ } else {
+ drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n");
}
- wake_up_all(&ufence->xe->ufence_wq);
+ /*
+ * Wake up waiters only after updating the ufence state, allowing the UMD
+ * to safely reuse the same ufence without encountering -EBUSY errors.
+ */
WRITE_ONCE(ufence->signalled, 1);
+ wake_up_all(&ufence->xe->ufence_wq);
user_fence_put(ufence);
}
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index dda5268507d8..07cf7cfe4abd 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include "xe_device.h"
@@ -129,6 +131,7 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
static int tile_ttm_mgr_init(struct xe_tile *tile)
{
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 8573d7a87d84..91130ad8999c 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -21,6 +21,7 @@
#include "xe_vm.h"
#define __dev_name_xe(xe) dev_name((xe)->drm.dev)
+#define __dev_name_tile(tile) __dev_name_xe(tile_to_xe((tile)))
#define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt)))
#define __dev_name_eq(q) __dev_name_gt((q)->gt)
@@ -342,12 +343,12 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
);
TRACE_EVENT(xe_reg_rw,
- TP_PROTO(struct xe_gt *gt, bool write, u32 reg, u64 val, int len),
+ TP_PROTO(struct xe_mmio *mmio, bool write, u32 reg, u64 val, int len),
- TP_ARGS(gt, write, reg, val, len),
+ TP_ARGS(mmio, write, reg, val, len),
TP_STRUCT__entry(
- __string(dev, __dev_name_gt(gt))
+ __string(dev, __dev_name_tile(mmio->tile))
__field(u64, val)
__field(u32, reg)
__field(u16, write)
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index 9b1a1d4304ae..30a3cfbaaa09 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(xe_vm,
),
TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev),
- __entry->vm, __entry->asid)
+ __entry->vm, __entry->asid)
);
DEFINE_EVENT(xe_vm, xe_vm_kill,
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index f7113cf6109d..423856cc18d4 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -60,7 +60,7 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u64 stolen_size;
u64 tile_offset;
@@ -94,7 +94,7 @@ static u32 get_wopcm_size(struct xe_device *xe)
u32 wopcm_size;
u64 val;
- val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
+ val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED);
val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
switch (val) {
@@ -119,7 +119,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
u32 stolen_size, wopcm_size;
u32 ggc, gms;
- ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
+ ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC);
/*
* Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
@@ -159,7 +159,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
stolen_size -= wopcm_size;
if (media_gt && XE_WA(media_gt, 14019821291)) {
- u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
+ u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE)
& ~GENMASK_ULL(5, 0);
/*
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 0d5e04158917..d449de0fb6ec 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -33,7 +33,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
{ XE_RTP_NAME("Tuning: L3 cache - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
@@ -43,7 +43,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
SET(CCCHKNREG1, L3CMPCTRL))
},
{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
},
@@ -52,7 +52,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
},
{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
@@ -61,7 +61,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
COMPMEMRD256BOVRFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
COMPMEMRD256BOVRFETCHEN))
},
@@ -71,7 +71,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
},
{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
- XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
},
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index d431d0031185..fb0eda3d5682 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -4,6 +4,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/fault-inject.h>
#include <linux/firmware.h>
#include <drm/drm_managed.h>
@@ -796,6 +797,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
return err;
}
+ALLOW_ERROR_INJECTION(xe_uc_fw_init, ERRNO); /* See xe_pci_probe() */
static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
{
@@ -806,6 +808,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
{
struct xe_device *xe = uc_fw_to_xe(uc_fw);
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+ struct xe_mmio *mmio = &gt->mmio;
u64 src_offset;
u32 dma_ctrl;
int ret;
@@ -814,34 +817,34 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
/* Set the source address for the uCode */
src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset;
- xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
- xe_mmio_write32(gt, DMA_ADDR_0_HIGH,
+ xe_mmio_write32(mmio, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
+ xe_mmio_write32(mmio, DMA_ADDR_0_HIGH,
upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT);
/* Set the DMA destination */
- xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset);
- xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
+ xe_mmio_write32(mmio, DMA_ADDR_1_LOW, offset);
+ xe_mmio_write32(mmio, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
/*
* Set the transfer size. The header plus uCode will be copied to WOPCM
* via DMA, excluding any other components
*/
- xe_mmio_write32(gt, DMA_COPY_SIZE,
+ xe_mmio_write32(mmio, DMA_COPY_SIZE,
sizeof(struct uc_css_header) + uc_fw->ucode_size);
/* Start the DMA */
- xe_mmio_write32(gt, DMA_CTRL,
+ xe_mmio_write32(mmio, DMA_CTRL,
_MASKED_BIT_ENABLE(dma_flags | START_DMA));
/* Wait for DMA to finish */
- ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
+ ret = xe_mmio_wait32(mmio, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
false);
if (ret)
drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
/* Disable the bits once DMA is over */
- xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
+ xe_mmio_write32(mmio, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 80ba2fc78837..b1f81dca610d 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -169,7 +169,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
u64 offset_hi, offset_lo;
u32 nodes, num_enabled;
- reg = xe_mmio_read32(gt, MIRROR_FUSE3);
+ reg = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg);
num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */
@@ -185,7 +185,8 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */
/* We don't expect any holes */
- xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
+ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(&gt_to_tile(gt)->mmio, GSMBASE) -
+ ccs_size),
"Hole between CCS and GSM.\n");
} else {
reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
@@ -219,8 +220,8 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_gt *gt = tile->primary_gt;
+ unsigned int fw_ref;
u64 offset;
- int err;
u32 reg;
if (IS_SRIOV_VF(xe)) {
@@ -239,9 +240,9 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
return 0;
}
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return err;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
/* actual size */
if (unlikely(xe->info.platform == XE_DG1)) {
@@ -257,13 +258,15 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
if (xe->info.has_flat_ccs) {
offset = get_flat_ccs_offset(gt, *tile_size);
} else {
- offset = xe_mmio_read64_2x32(gt, GSMBASE);
+ offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE);
}
/* remove the tile offset so we have just the available size */
*vram_size = offset - *tile_offset;
- return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ return 0;
}
static void vram_fini(void *arg)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 353936a0f877..02cf647f86d8 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -8,6 +8,7 @@
#include <drm/drm_managed.h>
#include <kunit/visibility.h>
#include <linux/compiler_types.h>
+#include <linux/fault-inject.h>
#include <generated/xe_wa_oob.h>
@@ -251,6 +252,34 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
+ /* Xe3_LPG */
+
+ { XE_RTP_NAME("14021871409"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)),
+ XE_RTP_ACTIONS(SET(UNSLCGCTL9454, LSCFE_CLKGATE_DIS))
+ },
+
+ /* Xe3_LPM */
+
+ { XE_RTP_NAME("16021867713"),
+ XE_RTP_RULES(MEDIA_VERSION(3000),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+ { XE_RTP_NAME("16021865536"),
+ XE_RTP_RULES(MEDIA_VERSION(3000),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+ { XE_RTP_NAME("14021486841"),
+ XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+
{}
};
@@ -567,6 +596,18 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
+ /* Xe3_LPG */
+
+ { XE_RTP_NAME("14021402888"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
+ },
+ { XE_RTP_NAME("18034896535"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+ },
+
{}
};
@@ -742,6 +783,18 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
+ /* Xe3_LPG */
+ { XE_RTP_NAME("14021490052"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0),
+ ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(FF_MODE,
+ DIS_MESH_PARTIAL_AUTOSTRIP |
+ DIS_MESH_AUTOSTRIP),
+ SET(VFLSKPD,
+ DIS_PARTIAL_AUTOSTRIP |
+ DIS_AUTOSTRIP))
+ },
+
{}
};
@@ -854,6 +907,7 @@ int xe_wa_init(struct xe_gt *gt)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */
void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
{
@@ -891,11 +945,11 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
*/
void xe_wa_apply_tile_workarounds(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
if (IS_SRIOV_VF(tile->xe))
return;
- if (XE_WA(mmio, 22010954014))
+ if (XE_WA(tile->primary_gt, 22010954014))
xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
}
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 920ca5060146..bcd04464b85e 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -33,7 +33,11 @@
GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
+ MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
22019338487_display PLATFORM(LUNARLAKE)
16023588340 GRAPHICS_VERSION(2001)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
+no_media_l3 MEDIA_VERSION(3000)
+14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
+ MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index d3a99157e523..ada0d0aa6b74 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -5,6 +5,8 @@
#include "xe_wopcm.h"
+#include <linux/fault-inject.h>
+
#include "regs/xe_guc_regs.h"
#include "xe_device.h"
#include "xe_force_wake.h"
@@ -123,8 +125,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
static bool __wopcm_regs_locked(struct xe_gt *gt,
u32 *guc_wopcm_base, u32 *guc_wopcm_size)
{
- u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET);
- u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE);
+ u32 reg_base = xe_mmio_read32(&gt->mmio, DMA_GUC_WOPCM_OFFSET);
+ u32 reg_size = xe_mmio_read32(&gt->mmio, GUC_WOPCM_SIZE);
if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
!(reg_base & GUC_WOPCM_OFFSET_VALID))
@@ -150,13 +152,13 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK);
mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
- err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask,
+ err = xe_mmio_write32_and_verify(&gt->mmio, GUC_WOPCM_SIZE, size, mask,
size | GUC_WOPCM_SIZE_LOCKED);
if (err)
goto err_out;
mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
- err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET,
+ err = xe_mmio_write32_and_verify(&gt->mmio, DMA_GUC_WOPCM_OFFSET,
base | huc_agent, mask,
base | huc_agent |
GUC_WOPCM_OFFSET_VALID);
@@ -169,10 +171,10 @@ err_out:
drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
DMA_GUC_WOPCM_OFFSET.addr,
- xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET));
+ xe_mmio_read32(&gt->mmio, DMA_GUC_WOPCM_OFFSET));
drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
GUC_WOPCM_SIZE.addr,
- xe_mmio_read32(gt, GUC_WOPCM_SIZE));
+ xe_mmio_read32(&gt->mmio, GUC_WOPCM_SIZE));
return err;
}
@@ -268,3 +270,4 @@ check:
return ret;
}
+ALLOW_ERROR_INJECTION(xe_wopcm_init, ERRNO); /* See xe_pci_probe() */