summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-04-24 10:51:28 +1000
committerDave Airlie <airlied@redhat.com>2024-04-24 10:51:29 +1000
commit83221064c28a0f9fdc4f63ab4fce2e51bfe23315 (patch)
tree1756afd6b9f1396d257e803797f8647f6b74ddb3
parent0208ca55aa9c9b997da1f5bc45c4e98916323f08 (diff)
parent48c64d495fbef343c59598a793d583dfd199d389 (diff)
Merge tag 'drm-xe-next-2024-04-23' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes: - Remove unused flags (Francois Dugast) - Extend uAPI to query HuC micro-controler firmware version (Francois Dugast) - drm/xe/uapi: Define topology types as indexes rather than masks (Francois Dugast) - drm/xe/uapi: Restore flags VM_BIND_FLAG_READONLY and VM_BIND_FLAG_IMMEDIATE (Francois Dugast) - devcoredump updates. Some touching the output format. (José Roberto de Souza, Matthew Brost) - drm/xe/hwmon: Add infra to support card power and energy attributes - Improve LRC, HWSP and HWCTX error capture. (Maarten Lankhorst) - drm/xe/uapi: Add IP version and stepping to GT list query (Matt roper) - Invalidate userptr VMA on page pin fault (Matthew Brost) - Improve xe_bo_move tracepoint (Priyanka Danamudi) - Align fence output format in ftrace log Cross-driver Changes: - drm/i915/hwmon: Get rid of devm (Ashutosh Dixit) (Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>) - drm/i915/display: convert inner wakeref get towards get_if_in_use (SOB Rodrigo Vivi) - drm/i915: Convert intel_runtime_pm_get_noresume towards raw wakeref (Committer, SOB Jani Nikula) Driver Changes: - Fix for unneeded CCS metadata allocation (Akshata Jahagirdar) - Fix for fix multicast support for Xe_LP platforms (Andrzej Hajda) - A couple of build fixes (Arnd Bergmann) - Fix register definition (Ashutosh Dixit) - Add BMG mocs table (Balasubramani Vivekanandan) - Replace sprintf() across driver (Bommu Krishnaiah) - Add an xe2 workaround (Bommu Krishnaiah) - Makefile fix (Dafna Hirschfeld) - force_wake_get error value check (Daniele Ceraolo Spurio) - Handle GSCCS ER interrupt (Daniele Ceraolo Spurio) - GSC Workaround (Daniele Ceraolo Spurio) - Build error fix (Dawei Li) - drm/xe/gt: Add L3 bank mask to GT topology (Francois Dugast) - Implement xe2- and GuC workarounds (Gustavo Sousa, Haridhar Kalvala, Himal rasad Ghimiray, John Harrison, Matt Roper, Radhakrishna Sripada, Vinay Belgaumkar, Badal Nilawar) - xe2hpg compression (Himal Ghimiray Prasad) - Error code cleanups and fixes (Himal Prasad Ghimiray) - struct xe_device cleanup (Jani Nikula) - Avoid validating bos when only requesting an exec dma-fence (José Roberto de Souza) - Remove debug message from migrate_clear (José Roberto de Souza) - Nuke EXEC_QUEUE_FLAG_PERSISTENT leftover internal flag (José Roberto de Souza) - Mark dpt and related vma as uncached (Juha-Pekka Heikkila) - Hwmon updates (Karthik Poosa) - KConfig fix when ACPI_WMI selcted (Lu Yao) - Update intel_uncore_read*() return types (Luca Coelho) - Mocs updates (Lucas De Marchi, Matt Roper) - Drop dynamic load-balancing workaround (Lucas De Marchi) - Fix a PVC workaround (Lucas De Marchi) - Group live kunit tests into a single module (Lucas De Marchi) - Various code cleanups (Lucas De Marchi) - Fix a ggtt init error patch and move ggtt invalidate out of ggtt lock (Maarten Lankhorst) - Fix a bo leak (Marten Lankhorst) - Add LRC parsing for more GPU instructions (Matt Roper) - Add various definitions for hardware and IP (Matt Roper) - Define all possible engines in media IP descriptors (Matt Roper) - Various cleanups, asserts and code fixes (Matthew Auld) - Various cleanups and code fixes (Matthew Brost) - Increase VM_BIND number of per-ioctl Ops (Matthew Brost, Paulo Zanoni) - Don't support execlists in xe_gt_tlb_invalidation layer (Matthew Brost) - Handle timing out of already signaled jobs gracefully (Matthew Brost) - Pipeline evict / restore of pinned BOs during suspend / resume (Matthew Brost) - Do not grab forcewakes when issuing GGTT TLB invalidation via GuC (Matthew Brost) - Drop ggtt invalidate from display code (Matthew Brost) - drm/xe: Add XE_BO_GGTT_INVALIDATE flag (Matthew Brost) - Add debug messages for MMU notifier and VMA invalidate (Matthew Brost) - Use ordered wq for preempt fence waiting (Matthew Brost) - Initial development for SR-IOV support including some refactoring (Michal Wajdeczko) - Various GuC- and GT- related cleanups and fixes (Michal Wajdeczko) - Move userptr over to start using hmm_range_fault (Oak Zeng) - Add new PCI IDs to DG2 platform (Ravi Kumar Vodapalli) - Pcode - and VRAM initialization check update (Riana Tauro) - Large PM update including i915 display patches, and a fix for one of those. (Rodrigo Vivi) - Introduce performance tuning changes for Xe2_HPG (Shekhar Chauhan) - GSC / HDCP updates (Suraj Kandpal) - Minor code cleanup (Tejas Upadhyay) - Rework / fix rebind TLB flushing and move rebind into the drm_exec locking loop (Thomas Hellström) - Backmerge (Thomas Hellström) - GuC updates and fixes (Vinay Belgaumkar, Zhanjun Dong) Signed-off-by: Dave Airlie <airlied@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iHUEABYKAB0WIQRskUM7w1oG5rx2IZO4FpNVCsYGvwUCZiestQAKCRC4FpNVCsYG # v8dLAQCDFUR7R5rwSdfqzNy+Djg+9ZgmtzVEfHZ+rI2lTReaCwEAhWeK7UooIMV0 # vGsSdsqGsJQm4VLRzE6H1yemCCQOBgM= # =HouD # -----END PGP SIGNATURE----- # gpg: Signature made Tue 23 Apr 2024 22:42:29 AEST # gpg: using EDDSA key 6C91433BC35A06E6BC762193B81693550AC606BF # gpg: Can't check signature: No public key # Conflicts: # drivers/gpu/drm/xe/xe_device_types.h # drivers/gpu/drm/xe/xe_vm.c # drivers/gpu/drm/xe/xe_vm_types.h From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Zievlb1wvqDg1ovi@fedora
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon94
-rw-r--r--drivers/base/devcoredump.c23
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c8
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc.c6
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_gsc.h7
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.c46
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c14
-rw-r--r--drivers/gpu/drm/xe/Kconfig2
-rw-r--r--drivers/gpu/drm/xe/Makefile13
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h200
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h10
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h9
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h6
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c8
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c16
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c8
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c4
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c39
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c240
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c7
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h18
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h3
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_instr_defs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gsc_regs.h7
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h65
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gtt_defs.h37
-rw-r--r--drivers/gpu/drm/xe/regs/xe_guc_regs.h15
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h19
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_sriov_regs.h3
-rw-r--r--drivers/gpu/drm/xe/tests/Makefile3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c12
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo_test.c5
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c57
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf_test.c5
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c136
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c27
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate_test.c5
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c96
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs_test.c5
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c1
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c6
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c137
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h74
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c4
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c23
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c47
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h6
-rw-r--r--drivers/gpu/drm/xe/xe_device.c225
-rw-r--r--drivers/gpu/drm/xe/xe_device.h9
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c16
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c7
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c8
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c14
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c74
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c136
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h8
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c100
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c15
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.c15
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c63
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c240
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c63
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c39
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h14
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c52
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h20
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c1977
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h56
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h54
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c257
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h35
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c418
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h25
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h34
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.c14
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c43
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c115
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h11
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c122
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c137
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c135
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.c7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.c279
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.c134
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.h51
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c116
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c230
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit_types.h13
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h21
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c253
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.h11
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c13
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.c5
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c46
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c155
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h7
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.c2
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c266
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c3
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c6
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c169
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h5
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c9
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c8
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c144
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h82
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c66
-rw-r--r--drivers/gpu/drm/xe/xe_module.c7
-rw-r--r--drivers/gpu/drm/xe/xe_module.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c23
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c43
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c117
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.h6
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c327
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h13
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c13
-rw-r--r--drivers/gpu/drm/xe/xe_query.c53
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c11
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c5
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c23
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c62
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.h6
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.c104
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.h30
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_helpers.h46
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c7
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h1
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c17
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.c17
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h6
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c15
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_sys_mgr.c5
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c14
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.h1
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c10
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c11
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c53
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.h8
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c210
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.c20
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.h2
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c135
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules11
-rw-r--r--include/drm/xe_pciids.h7
-rw-r--r--include/linux/devcoredump.h5
-rw-r--r--include/uapi/drm/xe_drm.h25
185 files changed, 7969 insertions, 2053 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 023fd82de3f7..d792a56f59ac 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -10,7 +10,7 @@ Description: RW. Card reactive sustained (PL1) power limit in microwatts.
power limit is disabled, writing 0 disables the
limit. Writing values > 0 and <= TDP will enable the power limit.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_rated_max
Date: September 2023
@@ -18,53 +18,93 @@ KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RO. Card default power limit (default TDP setting).
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_crit
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy1_input
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
-Description: RW. Card reactive critical (I1) power limit in microwatts.
+Description: RO. Card energy input of device in microjoules.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max_interval
+Date: October 2023
+KernelVersion: 6.6
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_max
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package reactive sustained (PL1) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically seconds)
+ exceeds this limit. A read value of 0 means that the PL1
+ power limit is disabled, writing 0 disables the
+ limit. Writing values > 0 and <= TDP will enable the power limit.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_rated_max
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package default power limit (default TDP setting).
- Card reactive critical (I1) power limit in microwatts is exposed
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_crit
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package reactive critical (I1) power limit in microwatts.
+
+ Package reactive critical (I1) power limit in microwatts is exposed
for client products. The power controller will throttle the
operating frequency if the power averaged over a window exceeds
this limit.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr1_crit
-Date: September 2023
-KernelVersion: 6.5
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr2_crit
+Date: February 2024
+KernelVersion: 6.8
Contact: intel-xe@lists.freedesktop.org
-Description: RW. Card reactive critical (I1) power limit in milliamperes.
+Description: RW. Package reactive critical (I1) power limit in milliamperes.
- Card reactive critical (I1) power limit in milliamperes is
+ Package reactive critical (I1) power limit in milliamperes is
exposed for server products. The power controller will throttle
the operating frequency if the power averaged over a window
exceeds this limit.
-What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/in0_input
-Date: September 2023
-KernelVersion: 6.5
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy2_input
+Date: February 2024
+KernelVersion: 6.8
Contact: intel-xe@lists.freedesktop.org
-Description: RO. Current Voltage in millivolt.
+Description: RO. Package energy input of device in microjoules.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy1_input
-Date: September 2023
-KernelVersion: 6.5
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_max_interval
+Date: February 2024
+KernelVersion: 6.8
Contact: intel-xe@lists.freedesktop.org
-Description: RO. Energy input of device in microjoules.
+Description: RW. Package sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max_interval
-Date: October 2023
-KernelVersion: 6.6
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/in1_input
+Date: February 2024
+KernelVersion: 6.8
Contact: intel-xe@lists.freedesktop.org
-Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
- milliseconds over which sustained power is averaged.
+Description: RO. Package current voltage in millivolt.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 7e2d1f0d903a..82aeb09b3d1b 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -305,6 +305,29 @@ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
}
/**
+ * dev_coredump_put - remove device coredump
+ * @dev: the struct device for the crashed device
+ *
+ * dev_coredump_put() removes coredump, if exists, for a given device from
+ * the file system and free its associated data otherwise, does nothing.
+ *
+ * It is useful for modules that do not want to keep coredump
+ * available after its unload.
+ */
+void dev_coredump_put(struct device *dev)
+{
+ struct device *existing;
+
+ existing = class_find_device(&devcd_class, NULL, dev,
+ devcd_match_failing);
+ if (existing) {
+ devcd_free(existing, NULL);
+ put_device(existing);
+ }
+}
+EXPORT_SYMBOL_GPL(dev_coredump_put);
+
+/**
* dev_coredumpm - create device coredump with read/free methods
* @dev: the struct device for the crashed device
* @owner: the module that contains the read/free functions, use %THIS_MODULE
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 6fd4fa52253a..03dc7edcc443 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -640,13 +640,7 @@ release_async_put_domains(struct i915_power_domains *power_domains,
enum intel_display_power_domain domain;
intel_wakeref_t wakeref;
- /*
- * The caller must hold already raw wakeref, upgrade that to a proper
- * wakeref to make the state checker happy about the HW access during
- * power well disabling.
- */
- assert_rpm_raw_wakeref_held(rpm);
- wakeref = intel_runtime_pm_get(rpm);
+ wakeref = intel_runtime_pm_get_noresume(rpm);
for_each_power_domain(domain, mask) {
/* Clear before put, so put's sanity check is happy. */
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c
index 302bff75b06c..35823e1f65d6 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c
@@ -13,6 +13,12 @@
#include "intel_hdcp_gsc.h"
#include "intel_hdcp_gsc_message.h"
+struct intel_hdcp_gsc_message {
+ struct i915_vma *vma;
+ void *hdcp_cmd_in;
+ void *hdcp_cmd_out;
+};
+
bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915)
{
return DISPLAY_VER(i915) >= 14;
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h
index eba2057c5a9e..5f610df61cc9 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h
@@ -10,12 +10,7 @@
#include <linux/types.h>
struct drm_i915_private;
-
-struct intel_hdcp_gsc_message {
- struct i915_vma *vma;
- void *hdcp_cmd_in;
- void *hdcp_cmd_out;
-};
+struct intel_hdcp_gsc_message;
bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915);
ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index c9169e56b9a1..49db3e09826c 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -787,7 +787,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!IS_DGFX(i915))
return;
- hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+ hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
@@ -813,14 +813,12 @@ void i915_hwmon_register(struct drm_i915_private *i915)
hwm_get_preregistration_info(i915);
/* hwmon_dev points to device hwmon<i> */
- hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
- ddat,
- &hwm_chip_info,
- hwm_groups);
- if (IS_ERR(hwmon_dev)) {
- i915->hwmon = NULL;
- return;
- }
+ hwmon_dev = hwmon_device_register_with_info(dev, ddat->name,
+ ddat,
+ &hwm_chip_info,
+ hwm_groups);
+ if (IS_ERR(hwmon_dev))
+ goto err;
ddat->hwmon_dev = hwmon_dev;
@@ -833,16 +831,36 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0))
continue;
- hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name,
- ddat_gt,
- &hwm_gt_chip_info,
- NULL);
+ hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name,
+ ddat_gt,
+ &hwm_gt_chip_info,
+ NULL);
if (!IS_ERR(hwmon_dev))
ddat_gt->hwmon_dev = hwmon_dev;
}
+ return;
+err:
+ i915_hwmon_unregister(i915);
}
void i915_hwmon_unregister(struct drm_i915_private *i915)
{
- fetch_and_zero(&i915->hwmon);
+ struct i915_hwmon *hwmon = i915->hwmon;
+ struct intel_gt *gt;
+ int i;
+
+ if (!hwmon)
+ return;
+
+ for_each_gt(gt, i915, i)
+ if (hwmon->ddat_gt[i].hwmon_dev)
+ hwmon_device_unregister(hwmon->ddat_gt[i].hwmon_dev);
+
+ if (hwmon->ddat.hwmon_dev)
+ hwmon_device_unregister(hwmon->ddat.hwmon_dev);
+
+ mutex_destroy(&hwmon->hwmon_lock);
+
+ kfree(i915->hwmon);
+ i915->hwmon = NULL;
}
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index d4e844128826..2d0647aca964 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -272,15 +272,11 @@ intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm)
* intel_runtime_pm_get_noresume - grab a runtime pm reference
* @rpm: the intel_runtime_pm structure
*
- * This function grabs a device-level runtime pm reference (mostly used for GEM
- * code to ensure the GTT or GT is on).
+ * This function grabs a device-level runtime pm reference.
*
- * It will _not_ power up the device but instead only check that it's powered
- * on. Therefore it is only valid to call this functions from contexts where
- * the device is known to be powered up and where trying to power it up would
- * result in hilarity and deadlocks. That pretty much means only the system
- * suspend/resume code where this is used to grab runtime pm references for
- * delayed setup down in work items.
+ * It will _not_ resume the device but instead only get an extra wakeref.
+ * Therefore it is only valid to call this functions from contexts where
+ * the device is known to be active and with another wakeref previously hold.
*
* Any runtime pm reference obtained by this function must have a symmetric
* call to intel_runtime_pm_put() to release the reference again.
@@ -289,7 +285,7 @@ intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm)
*/
intel_wakeref_t intel_runtime_pm_get_noresume(struct intel_runtime_pm *rpm)
{
- assert_rpm_wakelock_held(rpm);
+ assert_rpm_raw_wakeref_held(rpm);
pm_runtime_get_noresume(rpm->kdev);
intel_runtime_pm_acquire(rpm, true);
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index bfa0e9d4bd64..782934be0a77 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -29,6 +29,7 @@ config DRM_XE
select INPUT if ACPI
select ACPI_VIDEO if X86 && ACPI
select ACPI_BUTTON if ACPI
+ select X86_PLATFORM_DEVICES if X86 && ACPI
select ACPI_WMI if X86 && ACPI
select SYNC_FILE
select IOSF_MBI
@@ -44,6 +45,7 @@ config DRM_XE
select MMU_NOTIFIER
select WANT_DEV_COREDUMP
select AUXILIARY_BUS
+ select HMM_MIRROR
help
Experimental driver for Intel Xe series GPUs
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 6885c13214ee..8321ec4f9b46 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -49,6 +49,7 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
uses_generated_oob := \
$(obj)/xe_gsc.o \
$(obj)/xe_guc.o \
+ $(obj)/xe_guc_ads.o \
$(obj)/xe_migrate.o \
$(obj)/xe_ring_ops.o \
$(obj)/xe_vm.o \
@@ -97,6 +98,8 @@ xe-y += xe_bb.o \
xe_guc_db_mgr.o \
xe_guc_debugfs.o \
xe_guc_hwconfig.o \
+ xe_guc_id_mgr.o \
+ xe_guc_klv_helpers.o \
xe_guc_log.o \
xe_guc_pc.o \
xe_guc_submit.o \
@@ -145,6 +148,8 @@ xe-y += xe_bb.o \
xe_wa.o \
xe_wopcm.o
+xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
+
# graphics hardware monitoring (HWMON) support
xe-$(CONFIG_HWMON) += xe_hwmon.o
@@ -155,9 +160,14 @@ xe-y += \
xe_sriov.o
xe-$(CONFIG_PCI_IOV) += \
+ xe_gt_sriov_pf.o \
+ xe_gt_sriov_pf_config.o \
+ xe_gt_sriov_pf_control.o \
+ xe_gt_sriov_pf_policy.o \
xe_lmtt.o \
xe_lmtt_2l.o \
- xe_lmtt_ml.o
+ xe_lmtt_ml.o \
+ xe_sriov_pf.o
# include helpers for tests even when XE is built-in
ifdef CONFIG_DRM_XE_KUNIT_TEST
@@ -254,6 +264,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_global_state.o \
i915-display/intel_gmbus.o \
i915-display/intel_hdcp.o \
+ i915-display/intel_hdcp_gsc_message.o \
i915-display/intel_hdmi.o \
i915-display/intel_hotplug.o \
i915-display/intel_hotplug_irq.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index 5496a5890847..c1ad09b36453 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -3,8 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
-#ifndef _GUC_ACTIONS_PF_ABI_H
-#define _GUC_ACTIONS_PF_ABI_H
+#ifndef _ABI_GUC_ACTIONS_SRIOV_ABI_H
+#define _ABI_GUC_ACTIONS_SRIOV_ABI_H
#include "guc_communication_ctb_abi.h"
@@ -171,4 +171,200 @@
#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn
#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN
+/**
+ * DOC: GUC2PF_VF_STATE_NOTIFY
+ *
+ * The GUC2PF_VF_STATE_NOTIFY message is used by the GuC to notify PF about change
+ * of the VF state.
+ *
+ * This G2H message is sent as `CTB HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_GUC2PF_VF_STATE_NOTIFY` = 0x5106 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | DATA1 = **VFID** - VF identifier |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | DATA2 = **EVENT** - notification event: |
+ * | | | |
+ * | | | - _`GUC_PF_NOTIFY_VF_ENABLE` = 1 (only if VFID = 0) |
+ * | | | - _`GUC_PF_NOTIFY_VF_FLR` = 1 |
+ * | | | - _`GUC_PF_NOTIFY_VF_FLR_DONE` = 2 |
+ * | | | - _`GUC_PF_NOTIFY_VF_PAUSE_DONE` = 3 |
+ * | | | - _`GUC_PF_NOTIFY_VF_FIXUP_DONE` = 4 |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_GUC2PF_VF_STATE_NOTIFY 0x5106u
+
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC_PF_NOTIFY_VF_ENABLE 1u
+#define GUC_PF_NOTIFY_VF_FLR 1u
+#define GUC_PF_NOTIFY_VF_FLR_DONE 2u
+#define GUC_PF_NOTIFY_VF_PAUSE_DONE 3u
+#define GUC_PF_NOTIFY_VF_FIXUP_DONE 4u
+
+/**
+ * DOC: PF2GUC_UPDATE_VGT_POLICY
+ *
+ * This message is used by the PF to set `GuC VGT Policy KLVs`_.
+ *
+ * This message must be sent as `CTB HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY` = 0x5502 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that |
+ * | | | represents the start of `GuC VGT Policy KLVs`_ list. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **CFG_ADDR_HI** - upper 32 bits of above offset. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 3 | 31:0 | **CFG_SIZE** - size (in dwords) of the config buffer |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | **COUNT** - number of KLVs successfully applied |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY 0x5502u
+
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_1_CFG_ADDR_LO GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_2_CFG_ADDR_HI GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_3_CFG_SIZE GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: PF2GUC_UPDATE_VF_CFG
+ *
+ * The `PF2GUC_UPDATE_VF_CFG`_ message is used by PF to provision single VF in GuC.
+ *
+ * This message must be sent as `CTB HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VF_CFG` = 0x5503 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VFID** - identifier of the VF that the KLV |
+ * | | | configurations are being applied to |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that represents |
+ * | | | the start of a list of virtualization related KLV configs |
+ * | | | that are to be applied to the VF. |
+ * | | | If this parameter is zero, the list is not parsed. |
+ * | | | If full configs address parameter is zero and configs_size is|
+ * | | | zero associated VF config shall be reset to its default state|
+ * +---+-------+--------------------------------------------------------------+
+ * | 3 | 31:0 | **CFG_ADDR_HI** - upper 32 bits of configs address. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 4 | 31:0 | **CFG_SIZE** - size (in dwords) of the config buffer |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | **COUNT** - number of KLVs successfully applied |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_UPDATE_VF_CFG 0x5503u
+
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 4u)
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_1_VFID GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_2_CFG_ADDR_LO GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_3_CFG_ADDR_HI GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_4_CFG_SIZE GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: PF2GUC_VF_CONTROL
+ *
+ * The PF2GUC_VF_CONTROL message is used by the PF to trigger VF state change
+ * maintained by the GuC.
+ *
+ * This H2G message must be sent as `CTB HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_VF_CONTROL_CMD` = 0x5506 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | DATA1 = **VFID** - VF identifier |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | DATA2 = **COMMAND** - control command: |
+ * | | | |
+ * | | | - _`GUC_PF_TRIGGER_VF_PAUSE` = 1 |
+ * | | | - _`GUC_PF_TRIGGER_VF_RESUME` = 2 |
+ * | | | - _`GUC_PF_TRIGGER_VF_STOP` = 3 |
+ * | | | - _`GUC_PF_TRIGGER_VF_FLR_START` = 4 |
+ * | | | - _`GUC_PF_TRIGGER_VF_FLR_FINISH` = 5 |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_VF_CONTROL 0x5506u
+
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC_PF_TRIGGER_VF_PAUSE 1u
+#define GUC_PF_TRIGGER_VF_RESUME 2u
+#define GUC_PF_TRIGGER_VF_STOP 3u
+#define GUC_PF_TRIGGER_VF_FLR_START 4u
+#define GUC_PF_TRIGGER_VF_FLR_FINISH 5u
+
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 0400bc0fccdc..511cf974d585 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -319,4 +319,14 @@ enum {
#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b
#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u
+/*
+ * Workaround keys:
+ */
+enum xe_guc_klv_ids {
+ GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002,
+ GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005,
+ GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007,
+ GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index ea6b8e0f1f35..ffaa4d2f1eed 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -82,6 +82,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2)
#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
+#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE)
#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
@@ -127,18 +128,22 @@ static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
- if (xe_pm_runtime_get(xe) < 0) {
- xe_pm_runtime_put(xe);
- return 0;
- }
- return 1;
+ return xe_pm_runtime_resume_and_get(xe);
}
static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
- return xe_pm_runtime_get_if_active(xe);
+ return xe_pm_runtime_get_if_in_use(xe);
+}
+
+static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm)
+{
+ struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+ xe_pm_runtime_get_noresume(xe);
+ return true;
}
static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
index bd233007c1b7..cb6c7598824b 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -17,10 +17,15 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
{
struct xe_bo *bo;
int err;
- u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
+ u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN;
- if (align)
+ if (start < SZ_4K)
+ start = SZ_4K;
+
+ if (align) {
size = ALIGN(size, align);
+ start = ALIGN(start, align);
+ }
bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
NULL, size, start, end,
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
index cd26ddc0f69e..ef79793caa72 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -25,15 +25,15 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore,
return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
}
-static inline u32 intel_uncore_read8(struct intel_uncore *uncore,
- i915_reg_t i915_reg)
+static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
+ i915_reg_t i915_reg)
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
}
-static inline u32 intel_uncore_read16(struct intel_uncore *uncore,
+static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
i915_reg_t i915_reg)
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
index a9c1f9885c6b..e18521acc516 100644
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -11,7 +11,7 @@
void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
{
- if (bo->flags & XE_BO_CREATE_PINNED_BIT) {
+ if (bo->flags & XE_BO_FLAG_PINNED) {
/* Unpin our kernel fb first */
xe_bo_lock(bo, false);
xe_bo_unpin(bo);
@@ -33,9 +33,9 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
if (ret)
goto err;
- if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
+ if (!(bo->flags & XE_BO_FLAG_SCANOUT)) {
/*
- * XE_BO_SCANOUT_BIT should ideally be set at creation, or is
+ * XE_BO_FLAG_SCANOUT should ideally be set at creation, or is
* automatically set when creating FB. We cannot change caching
* mode when the boect is VM_BINDed, so we can only set
* coherency with display when unbound.
@@ -45,7 +45,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
ret = -EINVAL;
goto err;
}
- bo->flags |= XE_BO_SCANOUT_BIT;
+ bo->flags |= XE_BO_FLAG_SCANOUT;
}
ttm_bo_unreserve(&bo->ttm);
return 0;
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 51ae3561fd0d..9e4bcfdbc7e5 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -42,9 +42,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
if (!IS_DGFX(dev_priv)) {
obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv),
NULL, size,
- ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
- XE_BO_CREATE_STOLEN_BIT |
- XE_BO_CREATE_PINNED_BIT);
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_PINNED);
if (!IS_ERR(obj))
drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n");
else
@@ -52,9 +52,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
}
if (IS_ERR(obj)) {
obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size,
- ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
- XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) |
- XE_BO_CREATE_PINNED_BIT);
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) |
+ XE_BO_FLAG_PINNED);
}
if (IS_ERR(obj)) {
@@ -81,8 +81,8 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info
{
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) {
- if (obj->flags & XE_BO_CREATE_STOLEN_BIT)
+ if (!(obj->flags & XE_BO_FLAG_SYSTEM)) {
+ if (obj->flags & XE_BO_FLAG_STOLEN)
info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0);
else
info->fix.smem_start =
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 6ec375c1c4b6..63b27fbcdaca 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -101,8 +101,6 @@ static void display_destroy(struct drm_device *dev, void *dummy)
*/
int xe_display_create(struct xe_device *xe)
{
- int err;
-
spin_lock_init(&xe->display.fb_tracking.lock);
xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
@@ -110,11 +108,7 @@ int xe_display_create(struct xe_device *xe)
drmm_mutex_init(&xe->drm, &xe->sb_lock);
xe->enabled_irq_mask = ~0;
- err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
- if (err)
- return err;
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
}
static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index 27c2fb1c002a..44c9fd2143cc 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -45,8 +45,8 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915),
NULL, PAGE_ALIGN(size),
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) |
+ XE_BO_FLAG_GGTT);
if (IS_ERR(obj)) {
kfree(vma);
return false;
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 722c84a56607..3e1ae37c4c8b 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -10,6 +10,7 @@
#include "intel_fb_pin.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
+#include "xe_pm.h"
#include <drm/ttm/ttm_bo.h>
@@ -30,7 +31,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_
for (row = 0; row < height; row++) {
u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_WB]);
+ xe->pat.idx[XE_CACHE_NONE]);
iosys_map_wr(map, *dpt_ofs, u64, pte);
*dpt_ofs += 8;
@@ -62,7 +63,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
for (column = 0; column < width; column++) {
iosys_map_wr(map, *dpt_ofs, u64,
pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_WB]));
+ xe->pat.idx[XE_CACHE_NONE]));
*dpt_ofs += 8;
src_idx++;
@@ -99,18 +100,21 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
if (IS_DGFX(xe))
dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM0_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_VRAM0 |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE);
else
dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
ttm_bo_type_kernel,
- XE_BO_CREATE_STOLEN_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE);
if (IS_ERR(dpt))
dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
ttm_bo_type_kernel,
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE);
if (IS_ERR(dpt))
return PTR_ERR(dpt);
@@ -119,7 +123,7 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
for (x = 0; x < size / XE_PAGE_SIZE; x++) {
u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_WB]);
+ xe->pat.idx[XE_CACHE_NONE]);
iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
}
@@ -165,7 +169,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
for (row = 0; row < height; row++) {
u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_WB]);
+ xe->pat.idx[XE_CACHE_NONE]);
xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte);
*ggtt_ofs += XE_PAGE_SIZE;
@@ -190,7 +194,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
/* TODO: Consider sharing framebuffer mapping?
* embed i915_vma inside intel_framebuffer
*/
- xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+ xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
ret = mutex_lock_interruptible(&ggtt->lock);
if (ret)
goto out;
@@ -211,7 +215,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
for (x = 0; x < size; x += XE_PAGE_SIZE) {
u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x,
- xe->pat.idx[XE_CACHE_WB]);
+ xe->pat.idx[XE_CACHE_NONE]);
xe_ggtt_set_pte(ggtt, vma->node.start + x, pte);
}
@@ -238,11 +242,10 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
rot_info->plane[i].dst_stride);
}
- xe_ggtt_invalidate(ggtt);
out_unlock:
mutex_unlock(&ggtt->lock);
out:
- xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+ xe_pm_runtime_put(tile_to_xe(ggtt->tile));
return ret;
}
@@ -260,7 +263,7 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb,
if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) &&
intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 &&
- !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) {
+ !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) {
struct xe_tile *tile = xe_device_get_root_tile(xe);
/*
@@ -321,7 +324,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma)
xe_bo_unpin_map_no_vm(vma->dpt);
else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
vma->bo->ggtt_node.start != vma->node.start)
- xe_ggtt_remove_node(ggtt, &vma->node);
+ xe_ggtt_remove_node(ggtt, &vma->node, false);
ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
ttm_bo_unpin(&vma->bo->ttm);
@@ -353,7 +356,7 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
struct i915_vma *vma;
/* We reject creating !SCANOUT fb's, so this is weird.. */
- drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT));
+ drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT));
vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt);
if (IS_ERR(vma))
@@ -381,4 +384,4 @@ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
void intel_dpt_destroy(struct i915_address_space *vm)
{
return;
-} \ No newline at end of file
+}
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 0f11a39333e2..d46f87a039f2 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -3,32 +3,250 @@
* Copyright 2023, Intel Corporation.
*/
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+#include <drm/i915_hdcp_interface.h>
+#include <linux/delay.h>
+
+#include "abi/gsc_command_header_abi.h"
#include "intel_hdcp_gsc.h"
+#include "intel_hdcp_gsc_message.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_uc_fw.h"
+
+#define HECI_MEADDRESS_HDCP 18
+
+struct intel_hdcp_gsc_message {
+ struct xe_bo *hdcp_bo;
+ u64 hdcp_cmd_in;
+ u64 hdcp_cmd_out;
+};
-bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915)
+#define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header)
+
+bool intel_hdcp_gsc_cs_required(struct xe_device *xe)
{
- return true;
+ return DISPLAY_VER(xe) >= 14;
}
-bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915)
+bool intel_hdcp_gsc_check_status(struct xe_device *xe)
{
- return false;
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+ struct xe_gt *gt = tile->media_gt;
+ bool ret = true;
+
+ if (!xe_uc_fw_is_enabled(&gt->uc.gsc.fw))
+ return false;
+
+ xe_pm_runtime_get(xe);
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) {
+ drm_dbg_kms(&xe->drm,
+ "failed to get forcewake to check proxy status\n");
+ ret = false;
+ goto out;
+ }
+
+ if (!xe_gsc_proxy_init_done(&gt->uc.gsc))
+ ret = false;
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+out:
+ xe_pm_runtime_put(xe);
+ return ret;
}
-int intel_hdcp_gsc_init(struct drm_i915_private *i915)
+/*This function helps allocate memory for the command that we will send to gsc cs */
+static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
+ struct intel_hdcp_gsc_message *hdcp_message)
{
- drm_info(&i915->drm, "HDCP support not yet implemented\n");
- return -ENODEV;
+ struct xe_bo *bo = NULL;
+ u64 cmd_in, cmd_out;
+ int ret = 0;
+
+ /* allocate object of two page for HDCP command memory and store it */
+ bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT);
+
+ if (IS_ERR(bo)) {
+ drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
+ ret = PTR_ERR(bo);
+ goto out;
+ }
+
+ cmd_in = xe_bo_ggtt_addr(bo);
+ cmd_out = cmd_in + PAGE_SIZE;
+ xe_map_memset(xe, &bo->vmap, 0, 0, bo->size);
+
+ hdcp_message->hdcp_bo = bo;
+ hdcp_message->hdcp_cmd_in = cmd_in;
+ hdcp_message->hdcp_cmd_out = cmd_out;
+out:
+ return ret;
}
-void intel_hdcp_gsc_fini(struct drm_i915_private *i915)
+static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe)
{
+ struct intel_hdcp_gsc_message *hdcp_message;
+ int ret;
+
+ hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL);
+
+ if (!hdcp_message)
+ return -ENOMEM;
+
+ /*
+ * NOTE: No need to lock the comp mutex here as it is already
+ * going to be taken before this function called
+ */
+ ret = intel_hdcp_gsc_initialize_message(xe, hdcp_message);
+ if (ret) {
+ drm_err(&xe->drm, "Could not initialize hdcp_message\n");
+ kfree(hdcp_message);
+ return ret;
+ }
+
+ xe->display.hdcp.hdcp_message = hdcp_message;
+ return ret;
}
-ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
+static const struct i915_hdcp_ops gsc_hdcp_ops = {
+ .initiate_hdcp2_session = intel_hdcp_gsc_initiate_session,
+ .verify_receiver_cert_prepare_km =
+ intel_hdcp_gsc_verify_receiver_cert_prepare_km,
+ .verify_hprime = intel_hdcp_gsc_verify_hprime,
+ .store_pairing_info = intel_hdcp_gsc_store_pairing_info,
+ .initiate_locality_check = intel_hdcp_gsc_initiate_locality_check,
+ .verify_lprime = intel_hdcp_gsc_verify_lprime,
+ .get_session_key = intel_hdcp_gsc_get_session_key,
+ .repeater_check_flow_prepare_ack =
+ intel_hdcp_gsc_repeater_check_flow_prepare_ack,
+ .verify_mprime = intel_hdcp_gsc_verify_mprime,
+ .enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication,
+ .close_hdcp_session = intel_hdcp_gsc_close_session,
+};
+
+int intel_hdcp_gsc_init(struct xe_device *xe)
+{
+ struct i915_hdcp_arbiter *data;
+ int ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ mutex_lock(&xe->display.hdcp.hdcp_mutex);
+ xe->display.hdcp.arbiter = data;
+ xe->display.hdcp.arbiter->hdcp_dev = xe->drm.dev;
+ xe->display.hdcp.arbiter->ops = &gsc_hdcp_ops;
+ ret = intel_hdcp_gsc_hdcp2_init(xe);
+ if (ret)
+ kfree(data);
+
+ mutex_unlock(&xe->display.hdcp.hdcp_mutex);
+
+ return ret;
+}
+
+void intel_hdcp_gsc_fini(struct xe_device *xe)
+{
+ struct intel_hdcp_gsc_message *hdcp_message =
+ xe->display.hdcp.hdcp_message;
+
+ if (!hdcp_message)
+ return;
+
+ xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo);
+ kfree(hdcp_message);
+}
+
+static int xe_gsc_send_sync(struct xe_device *xe,
+ struct intel_hdcp_gsc_message *hdcp_message,
+ u32 msg_size_in, u32 msg_size_out,
+ u32 addr_out_off)
+{
+ struct xe_gt *gt = hdcp_message->hdcp_bo->tile->media_gt;
+ struct iosys_map *map = &hdcp_message->hdcp_bo->vmap;
+ struct xe_gsc *gsc = &gt->uc.gsc;
+ int ret;
+
+ ret = xe_gsc_pkt_submit_kernel(gsc, hdcp_message->hdcp_cmd_in, msg_size_in,
+ hdcp_message->hdcp_cmd_out, msg_size_out);
+ if (ret) {
+ drm_err(&xe->drm, "failed to send gsc HDCP msg (%d)\n", ret);
+ return ret;
+ }
+
+ if (xe_gsc_check_and_update_pending(xe, map, 0, map, addr_out_off))
+ return -EAGAIN;
+
+ ret = xe_gsc_read_out_header(xe, map, addr_out_off,
+ sizeof(struct hdcp_cmd_header), NULL);
+
+ return ret;
+}
+
+ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in,
size_t msg_in_len, u8 *msg_out,
size_t msg_out_len)
{
- return -ENODEV;
+ const size_t max_msg_size = PAGE_SIZE - HDCP_GSC_HEADER_SIZE;
+ struct intel_hdcp_gsc_message *hdcp_message;
+ u64 host_session_id;
+ u32 msg_size_in, msg_size_out;
+ u32 addr_out_off, addr_in_wr_off = 0;
+ int ret, tries = 0;
+
+ if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE;
+ msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE;
+ hdcp_message = xe->display.hdcp.hdcp_message;
+ addr_out_off = PAGE_SIZE;
+
+ host_session_id = xe_gsc_create_host_session_id();
+ xe_pm_runtime_get_noresume(xe);
+ addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap,
+ addr_in_wr_off, HECI_MEADDRESS_HDCP,
+ host_session_id, msg_in_len);
+ xe_map_memcpy_to(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off,
+ msg_in, msg_in_len);
+ /*
+ * Keep sending request in case the pending bit is set no need to add
+ * message handle as we are using same address hence loc. of header is
+ * same and it will contain the message handle. we will send the message
+ * 20 times each message 50 ms apart
+ */
+ do {
+ ret = xe_gsc_send_sync(xe, hdcp_message, msg_size_in, msg_size_out,
+ addr_out_off);
+
+ /* Only try again if gsc says so */
+ if (ret != -EAGAIN)
+ break;
+
+ msleep(50);
+
+ } while (++tries < 20);
+
+ if (ret)
+ goto out;
+
+ xe_map_memcpy_from(xe, msg_out, &hdcp_message->hdcp_bo->vmap,
+ addr_out_off + HDCP_GSC_HEADER_SIZE,
+ msg_out_len);
+
+out:
+ xe_pm_runtime_put(xe);
+ return ret;
}
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index 866d1dd6eeb4..9693c56d386b 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -6,6 +6,7 @@
/* for ioread64 */
#include <linux/io-64-nonatomic-lo-hi.h>
+#include "regs/xe_gtt_defs.h"
#include "xe_ggtt.h"
#include "i915_drv.h"
@@ -62,7 +63,7 @@ initial_plane_bo(struct xe_device *xe,
if (plane_config->size == 0)
return NULL;
- flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT;
+ flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT;
base = round_down(plane_config->base, page_size);
if (IS_DGFX(xe)) {
@@ -79,7 +80,7 @@ initial_plane_bo(struct xe_device *xe,
}
phys_base = pte & ~(page_size - 1);
- flags |= XE_BO_CREATE_VRAM0_BIT;
+ flags |= XE_BO_FLAG_VRAM0;
/*
* We don't currently expect this to ever be placed in the
@@ -101,7 +102,7 @@ initial_plane_bo(struct xe_device *xe,
if (!stolen)
return NULL;
phys_base = base;
- flags |= XE_BO_CREATE_STOLEN_BIT;
+ flags |= XE_BO_FLAG_STOLEN;
/*
* If the FB is too big, just don't use it since fbdev is not very
diff --git a/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h
new file mode 100644
index 000000000000..dca62af5a5d5
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GFX_STATE_COMMANDS_H_
+#define _XE_GFX_STATE_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+#define GFX_STATE_OPCODE REG_GENMASK(28, 26)
+
+#define GFX_STATE_CMD(opcode) \
+ (XE_INSTR_GFX_STATE | REG_FIELD_PREP(GFX_STATE_OPCODE, opcode))
+
+#define STATE_WRITE_INLINE GFX_STATE_CMD(0x0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
index 8e6dd061f2ae..31d28a67ef6a 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -47,6 +47,8 @@
#define GPGPU_CSR_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x4)
#define STATE_COMPUTE_MODE GFXPIPE_COMMON_CMD(0x1, 0x5)
#define CMD_3DSTATE_BTD GFXPIPE_COMMON_CMD(0x1, 0x6)
+#define STATE_SYSTEM_MEM_FENCE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x9)
+#define STATE_CONTEXT_DATA_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0xB)
#define CMD_3DSTATE_VF_STATISTICS GFXPIPE_SINGLE_DW_CMD(0x0, 0xB)
@@ -71,6 +73,7 @@
#define CMD_3DSTATE_WM GFXPIPE_3D_CMD(0x0, 0x14)
#define CMD_3DSTATE_CONSTANT_VS GFXPIPE_3D_CMD(0x0, 0x15)
#define CMD_3DSTATE_CONSTANT_GS GFXPIPE_3D_CMD(0x0, 0x16)
+#define CMD_3DSTATE_CONSTANT_PS GFXPIPE_3D_CMD(0x0, 0x17)
#define CMD_3DSTATE_SAMPLE_MASK GFXPIPE_3D_CMD(0x0, 0x18)
#define CMD_3DSTATE_CONSTANT_HS GFXPIPE_3D_CMD(0x0, 0x19)
#define CMD_3DSTATE_CONSTANT_DS GFXPIPE_3D_CMD(0x0, 0x1A)
diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
index 04179b2a48e1..fd2ce7ace510 100644
--- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
+++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
@@ -17,6 +17,7 @@
#define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
#define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
#define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
+#define XE_INSTR_GFX_STATE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4)
/*
* Most (but not all) instructions have a "length" field in the instruction
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index deddc8be48c0..af71b87d8030 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -104,9 +104,6 @@
#define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED)
#define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14)
-#define FF_SLICE_CS_CHICKEN2(base) XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED)
-#define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15)
-
#define CS_DEBUG_MODE1(base) XE_REG((base) + 0xec, XE_REG_OPTION_MASKED)
#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1)
#define REPLAY_MODE_GRANULARITY REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
index 9886ec9cb08e..e2a925be137c 100644
--- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -38,4 +38,11 @@
#define HECI_H_GS1(base) XE_REG((base) + 0xc4c)
#define HECI_H_GS1_ER_PREP REG_BIT(0)
+#define GSCI_TIMER_STATUS XE_REG(0x11ca28)
+#define GSCI_TIMER_STATUS_VALUE REG_GENMASK(1, 0)
+#define GSCI_TIMER_STATUS_RESET_IN_PROGRESS 0
+#define GSCI_TIMER_STATUS_TIMER_EXPIRED 1
+#define GSCI_TIMER_STATUS_RESET_COMPLETE 2
+#define GSCI_TIMER_STATUS_OUT_OF_RESET 3
+
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 15ac2d284d48..94445810ccc9 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -69,10 +69,14 @@
#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4)
#define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910)
+#define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8)
#define WM_CHICKEN3 XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED)
#define HIZ_PLANE_COMPRESSION_DIS REG_BIT(10)
+#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED)
+#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8)
+
#define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
#define TBIMR_FAST_CLIP REG_BIT(5)
@@ -97,7 +101,8 @@
#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED)
#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11)
-#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010)
+#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED)
+#define DISABLE_BOTTOM_CLIP_RECTANGLE_TEST REG_BIT(14)
#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED)
#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
@@ -141,6 +146,10 @@
#define XE2_FLAT_CCS_BASE_RANGE_LOWER XE_REG_MCR(0x8800)
#define XE2_FLAT_CCS_ENABLE REG_BIT(0)
+#define XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK REG_GENMASK(31, 6)
+
+#define XE2_FLAT_CCS_BASE_RANGE_UPPER XE_REG_MCR(0x8804)
+#define XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK REG_GENMASK(7, 0)
#define GSCPSMI_BASE XE_REG(0x880c)
@@ -156,7 +165,10 @@
#define MIRROR_FUSE3 XE_REG(0x9118)
#define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16)
#define L3BANK_PAIR_COUNT 4
+#define XEHPC_GT_L3_MODE_MASK REG_GENMASK(7, 4)
+#define XE2_GT_L3_MODE_MASK REG_GENMASK(7, 4)
#define L3BANK_MASK REG_GENMASK(3, 0)
+#define XELP_GT_L3_MODE_MASK REG_GENMASK(7, 0)
/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
#define MAX_MSLICES 4
#define MEML3_EN_MASK REG_GENMASK(3, 0)
@@ -271,6 +283,10 @@
#define FORCEWAKE_GT XE_REG(0xa188)
#define PG_ENABLE XE_REG(0xa210)
+#define VD2_MFXVDENC_POWERGATE_ENABLE REG_BIT(8)
+#define VD2_HCP_POWERGATE_ENABLE REG_BIT(7)
+#define VD0_MFXVDENC_POWERGATE_ENABLE REG_BIT(4)
+#define VD0_HCP_POWERGATE_ENABLE REG_BIT(3)
#define CTC_MODE XE_REG(0xa26c)
#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1)
@@ -349,6 +365,7 @@
#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
#define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED)
+#define XE2_EUPEND_CHK_FLUSH_DIS REG_BIT(14)
#define DIS_FIX_EOT1_FLUSH REG_BIT(9)
#define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED)
@@ -364,17 +381,22 @@
#define DISABLE_EARLY_READ REG_BIT(14)
#define ENABLE_LARGE_GRF_MODE REG_BIT(12)
#define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
+#define DISABLE_TDL_SVHS_GATING REG_BIT(1)
#define DISABLE_DOP_GATING REG_BIT(0)
#define RT_CTRL XE_REG_MCR(0xe530)
#define DIS_NULL_QUERY REG_BIT(10)
+#define EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK XE_REG_MCR(0xe534)
+#define EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT REG_BIT(31)
+
#define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED)
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3)
#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8)
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
+#define WR_REQ_CHAINING_DIS REG_BIT(26)
#define TGM_WRITE_EOM_FORCE REG_BIT(17)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
#define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13)
@@ -439,7 +461,13 @@
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)
-#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4))
+/*
+ * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
+ * On newer platforms, VFs are using memory-based interrupts instead.
+ * However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
+ */
+
+#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
#define INTR_GSC REG_BIT(31)
#define INTR_GUC REG_BIT(25)
#define INTR_MGUC REG_BIT(24)
@@ -450,16 +478,16 @@
#define INTR_VECS(x) REG_BIT(31 - (x))
#define INTR_VCS(x) REG_BIT(x)
-#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030)
-#define VCS_VECS_INTR_ENABLE XE_REG(0x190034)
-#define GUC_SG_INTR_ENABLE XE_REG(0x190038)
+#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF)
+#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF)
+#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF)
#define ENGINE1_MASK REG_GENMASK(31, 16)
#define ENGINE0_MASK REG_GENMASK(15, 0)
-#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c)
-#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044)
-#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048)
+#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF)
+#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF)
-#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4))
+#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
#define INTR_DATA_VALID REG_BIT(31)
#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x)
#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
@@ -468,16 +496,16 @@
#define OTHER_GSC_HECI2_INSTANCE 3
#define OTHER_GSC_INSTANCE 6
-#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4))
-#define RCS0_RSVD_INTR_MASK XE_REG(0x190090)
-#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0)
-#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8)
-#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac)
-#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0)
+#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
+#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF)
+#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
+#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
+#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
+#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
-#define GUC_SG_INTR_MASK XE_REG(0x1900e8)
-#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec)
-#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4)
+#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
+#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF)
#define CCS0_CCS1_INTR_MASK XE_REG(0x190100)
#define CCS2_CCS3_INTR_MASK XE_REG(0x190104)
#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110)
@@ -486,6 +514,7 @@
#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c)
#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
+#define GSC_ER_COMPLETE REG_BIT(5)
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
new file mode 100644
index 000000000000..4389e5a76f89
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GTT_DEFS_H_
+#define _XE_GTT_DEFS_H_
+
+#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52)
+#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53)
+
+#define GGTT_PTE_VFID GENMASK_ULL(11, 2)
+
+#define GUC_GGTT_TOP 0xFEE00000
+
+#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62)
+#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61)
+#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12)
+#define XE_PPGTT_PTE_PAT2 BIT_ULL(7)
+#define XE_PPGTT_PTE_PAT1 BIT_ULL(4)
+#define XE_PPGTT_PTE_PAT0 BIT_ULL(3)
+
+#define XE_PDE_PS_2M BIT_ULL(7)
+#define XE_PDPE_PS_1G BIT_ULL(7)
+#define XE_PDE_IPS_64K BIT_ULL(11)
+
+#define XE_GGTT_PTE_DM BIT_ULL(1)
+#define XE_USM_PPGTT_PTE_AE BIT_ULL(10)
+#define XE_PPGTT_PTE_DM BIT_ULL(11)
+#define XE_PDE_64K BIT_ULL(6)
+#define XE_PTE_PS64 BIT_ULL(8)
+#define XE_PTE_NULL BIT_ULL(9)
+
+#define XE_PAGE_PRESENT BIT_ULL(0)
+#define XE_PAGE_RW BIT_ULL(1)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 92320bbc9d3d..11682e675e0f 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -100,16 +100,23 @@
#define GT_PM_CONFIG XE_REG(0x13816c)
#define GT_DOORBELL_ENABLE REG_BIT(0)
-#define GUC_HOST_INTERRUPT XE_REG(0x1901f0)
+#define GUC_HOST_INTERRUPT XE_REG(0x1901f0, XE_REG_OPTION_VF)
-#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4)
+#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4, XE_REG_OPTION_VF)
#define VF_SW_FLAG_COUNT 4
-#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304)
+#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304, XE_REG_OPTION_VF)
-#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4)
+#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4, XE_REG_OPTION_VF)
#define MED_VF_SW_FLAG_COUNT 4
+#define GUC_TLB_INV_CR XE_REG(0xcee8)
+#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c)
+#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80)
+#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6)
+
/* GuC Interrupt Vector */
#define GUC_INTR_GUC2HOST REG_BIT(15)
#define GUC_INTR_EXEC_ERROR REG_BIT(14)
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index c50e7650c09a..23f7dc5bbe99 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -6,6 +6,8 @@
#ifndef _XE_REG_DEFS_H_
#define _XE_REG_DEFS_H_
+#include <linux/build_bug.h>
+
#include "compat-i915-headers/i915_reg_defs.h"
/**
@@ -36,6 +38,10 @@ struct xe_reg {
*/
u32 mcr:1;
/**
+ * @vf: register is accessible from the Virtual Function.
+ */
+ u32 vf:1;
+ /**
* @ext: access MMIO extension space for current register.
*/
u32 ext:1;
@@ -44,6 +50,7 @@ struct xe_reg {
u32 raw;
};
};
+static_assert(sizeof(struct xe_reg) == sizeof(u32));
/**
* struct xe_reg_mcr - MCR register definition
@@ -76,6 +83,13 @@ struct xe_reg_mcr {
#define XE_REG_OPTION_MASKED .masked = 1
/**
+ * XE_REG_OPTION_VF - Register is "VF" accessible.
+ *
+ * To be used with XE_REG() and XE_REG_INITIALIZER().
+ */
+#define XE_REG_OPTION_VF .vf = 1
+
+/**
* XE_REG_INITIALIZER - Initializer for xe_reg_t.
* @r_: Register offset
* @...: Additional options like access mode. See struct xe_reg for available
@@ -117,4 +131,9 @@ struct xe_reg_mcr {
.__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
})
+static inline bool xe_reg_is_valid(struct xe_reg r)
+{
+ return r.addr;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 2c214bb9b671..722fb6dbb72e 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -57,7 +57,7 @@
#define DG1_MSTR_IRQ REG_BIT(31)
#define DG1_MSTR_TILE(t) REG_BIT(t)
-#define GFX_MSTR_IRQ XE_REG(0x190010)
+#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
#define MASTER_IRQ REG_BIT(31)
#define GU_MISC_IRQ REG_BIT(29)
#define DISPLAY_IRQ REG_BIT(16)
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
index 58a4e0fad1e1..617ddb84b7fa 100644
--- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -14,4 +14,7 @@
#define LMEM_EN REG_BIT(31)
#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */
+#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
+#define VF_CAP REG_BIT(0)
+
#endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 9d1d88af8b2f..8cf2367449d8 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# "live" kunit tests
-obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_live_test.o
+xe_live_test-y = xe_live_test_mod.o \
xe_bo_test.o \
xe_dma_buf_test.o \
xe_migrate_test.o \
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 3436fd9cf2b2..9f3c02826464 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -116,7 +116,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
int ret;
/* TODO: Sanity check */
- unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+ unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
if (IS_DGFX(xe))
kunit_info(test, "Testing vram id %u\n", tile->id);
@@ -163,7 +163,7 @@ static int ccs_test_run_device(struct xe_device *xe)
return 0;
}
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get(xe);
for_each_tile(tile, xe, id) {
/* For igfx run only for primary tile */
@@ -172,7 +172,7 @@ static int ccs_test_run_device(struct xe_device *xe)
ccs_test_run_tile(xe, tile, test);
}
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return 0;
}
@@ -186,7 +186,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
{
struct xe_bo *bo, *external;
- unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+ unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
struct xe_gt *__gt;
int err, i, id;
@@ -335,12 +335,12 @@ static int evict_test_run_device(struct xe_device *xe)
return 0;
}
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get(xe);
for_each_tile(tile, xe, id)
evict_test_run_tile(xe, tile, test);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return 0;
}
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
index f408f17f2164..a324cde77db8 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -19,8 +19,3 @@ static struct kunit_suite xe_bo_test_suite = {
};
kunit_test_suite(xe_bo_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_bo kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 9f6d571d7fa9..e7f9b531c465 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -12,6 +12,7 @@
#include "tests/xe_pci_test.h"
#include "xe_pci.h"
+#include "xe_pm.h"
static bool p2p_enabled(struct dma_buf_test_params *params)
{
@@ -36,14 +37,14 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
xe_bo_assert_held(imported);
mem_type = XE_PL_VRAM0;
- if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+ if (!(params->mem_mask & XE_BO_FLAG_VRAM0))
/* No VRAM allowed */
mem_type = XE_PL_TT;
else if (params->force_different_devices && !p2p_enabled(params))
/* No P2P */
mem_type = XE_PL_TT;
else if (params->force_different_devices && !is_dynamic(params) &&
- (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+ (params->mem_mask & XE_BO_FLAG_SYSTEM))
/* Pin migrated to TT */
mem_type = XE_PL_TT;
@@ -93,7 +94,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
* possible, saving a migration step as the transfer is just
* likely as fast from system memory.
*/
- if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+ if (params->mem_mask & XE_BO_FLAG_SYSTEM)
KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
else
KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
@@ -115,17 +116,17 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
/* No VRAM on this device? */
if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
- (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+ (params->mem_mask & XE_BO_FLAG_VRAM0))
return;
size = PAGE_SIZE;
- if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) &&
+ if ((params->mem_mask & XE_BO_FLAG_VRAM0) &&
xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
size = SZ_64K;
kunit_info(test, "running %s\n", __func__);
bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask);
+ ttm_bo_type_device, params->mem_mask);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(bo));
@@ -148,7 +149,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
*/
if (params->force_different_devices &&
!p2p_enabled(params) &&
- !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+ !(params->mem_mask & XE_BO_FLAG_SYSTEM)) {
KUNIT_FAIL(test,
"xe_gem_prime_import() succeeded when it shouldn't have\n");
} else {
@@ -161,7 +162,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
/* Pinning in VRAM is not allowed. */
if (!is_dynamic(params) &&
params->force_different_devices &&
- !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+ !(params->mem_mask & XE_BO_FLAG_SYSTEM))
KUNIT_EXPECT_EQ(test, err, -EINVAL);
/* Otherwise only expect interrupts or success. */
else if (err && err != -EINTR && err != -ERESTARTSYS)
@@ -180,7 +181,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
PTR_ERR(import));
} else if (!params->force_different_devices ||
p2p_enabled(params) ||
- (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+ (params->mem_mask & XE_BO_FLAG_SYSTEM)) {
/* Shouldn't fail if we can reuse same bo, use p2p or use system */
KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
PTR_ERR(import));
@@ -203,52 +204,52 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = {
* gem object.
*/
static const struct dma_buf_test_params test_params[] = {
- {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_VRAM0,
.attach_ops = &xe_dma_buf_attach_ops},
- {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_VRAM0,
.attach_ops = &xe_dma_buf_attach_ops,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_VRAM0,
.attach_ops = &nop2p_attach_ops},
- {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_VRAM0,
.attach_ops = &nop2p_attach_ops,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_VRAM0_BIT},
- {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_VRAM0},
+ {.mem_mask = XE_BO_FLAG_VRAM0,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM,
.attach_ops = &xe_dma_buf_attach_ops},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM,
.attach_ops = &xe_dma_buf_attach_ops,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM,
.attach_ops = &nop2p_attach_ops},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM,
.attach_ops = &nop2p_attach_ops,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM},
+ {.mem_mask = XE_BO_FLAG_SYSTEM,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
.attach_ops = &xe_dma_buf_attach_ops},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
.attach_ops = &xe_dma_buf_attach_ops,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
.attach_ops = &nop2p_attach_ops},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
.attach_ops = &nop2p_attach_ops,
.force_different_devices = true},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT},
- {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0},
+ {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
.force_different_devices = true},
{}
@@ -259,6 +260,7 @@ static int dma_buf_run_device(struct xe_device *xe)
const struct dma_buf_test_params *params;
struct kunit *test = xe_cur_kunit();
+ xe_pm_runtime_get(xe);
for (params = test_params; params->mem_mask; ++params) {
struct dma_buf_test_params p = *params;
@@ -266,6 +268,7 @@ static int dma_buf_run_device(struct xe_device *xe)
test->priv = &p;
xe_test_dmabuf_import_same_driver(xe);
}
+ xe_pm_runtime_put(xe);
/* A non-zero return would halt iteration over driver devices */
return 0;
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
index 9f5a9cda8c0f..99cdb718b6c6 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -18,8 +18,3 @@ static struct kunit_suite xe_dma_buf_test_suite = {
};
kunit_test_suite(xe_dma_buf_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_dma_buf kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c
new file mode 100644
index 000000000000..ee30a1939eb0
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+
+static int guc_id_mgr_test_init(struct kunit *test)
+{
+ struct xe_guc_id_mgr *idm;
+
+ xe_kunit_helper_xe_device_test_init(test);
+ idm = &xe_device_get_gt(test->priv, 0)->uc.guc.submission_state.idm;
+
+ mutex_init(idm_mutex(idm));
+ test->priv = idm;
+ return 0;
+}
+
+static void bad_init(struct kunit *test)
+{
+ struct xe_guc_id_mgr *idm = test->priv;
+
+ KUNIT_EXPECT_EQ(test, -EINVAL, xe_guc_id_mgr_init(idm, 0));
+ KUNIT_EXPECT_EQ(test, -ERANGE, xe_guc_id_mgr_init(idm, GUC_ID_MAX + 1));
+}
+
+static void no_init(struct kunit *test)
+{
+ struct xe_guc_id_mgr *idm = test->priv;
+
+ mutex_lock(idm_mutex(idm));
+ KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve_locked(idm, 0));
+ mutex_unlock(idm_mutex(idm));
+
+ KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve(idm, 1, 1));
+}
+
+static void init_fini(struct kunit *test)
+{
+ struct xe_guc_id_mgr *idm = test->priv;
+
+ KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1));
+ KUNIT_EXPECT_NOT_NULL(test, idm->bitmap);
+ KUNIT_EXPECT_EQ(test, idm->total, GUC_ID_MAX);
+ __fini_idm(NULL, idm);
+ KUNIT_EXPECT_NULL(test, idm->bitmap);
+ KUNIT_EXPECT_EQ(test, idm->total, 0);
+}
+
+static void check_used(struct kunit *test)
+{
+ struct xe_guc_id_mgr *idm = test->priv;
+ unsigned int n;
+
+ KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2));
+
+ mutex_lock(idm_mutex(idm));
+
+ for (n = 0; n < idm->total; n++) {
+ kunit_info(test, "n=%u", n);
+ KUNIT_EXPECT_EQ(test, idm->used, n);
+ KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 0), 0);
+ KUNIT_EXPECT_EQ(test, idm->used, n + 1);
+ }
+ KUNIT_EXPECT_EQ(test, idm->used, idm->total);
+ idm_release_chunk_locked(idm, 0, idm->used);
+ KUNIT_EXPECT_EQ(test, idm->used, 0);
+
+ mutex_unlock(idm_mutex(idm));
+}
+
+static void check_quota(struct kunit *test)
+{
+ struct xe_guc_id_mgr *idm = test->priv;
+ unsigned int n;
+
+ KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2));
+
+ mutex_lock(idm_mutex(idm));
+
+ for (n = 0; n < idm->total - 1; n++) {
+ kunit_info(test, "n=%u", n);
+ KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total), -EDQUOT);
+ KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total - n), -EDQUOT);
+ KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, idm->total - n, 1), -EDQUOT);
+ KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 1), 0);
+ }
+ KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0));
+ KUNIT_EXPECT_EQ(test, idm->used, idm->total);
+ idm_release_chunk_locked(idm, 0, idm->total);
+ KUNIT_EXPECT_EQ(test, idm->used, 0);
+
+ mutex_unlock(idm_mutex(idm));
+}
+
+static void check_all(struct kunit *test)
+{
+ struct xe_guc_id_mgr *idm = test->priv;
+ unsigned int n;
+
+ KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1));
+
+ mutex_lock(idm_mutex(idm));
+
+ for (n = 0; n < idm->total; n++)
+ KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0));
+ KUNIT_EXPECT_EQ(test, idm->used, idm->total);
+ for (n = 0; n < idm->total; n++)
+ idm_release_chunk_locked(idm, n, 1);
+
+ mutex_unlock(idm_mutex(idm));
+}
+
+static struct kunit_case guc_id_mgr_test_cases[] = {
+ KUNIT_CASE(bad_init),
+ KUNIT_CASE(no_init),
+ KUNIT_CASE(init_fini),
+ KUNIT_CASE(check_used),
+ KUNIT_CASE(check_quota),
+ KUNIT_CASE_SLOW(check_all),
+ {}
+};
+
+static struct kunit_suite guc_id_mgr_suite = {
+ .name = "guc_idm",
+ .test_cases = guc_id_mgr_test_cases,
+
+ .init = guc_id_mgr_test_init,
+ .exit = NULL,
+};
+
+kunit_test_suites(&guc_id_mgr_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
new file mode 100644
index 000000000000..eb1ea99a5a8b
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe live kunit tests");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index c347e2c29f81..977d5f4e4490 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -10,6 +10,7 @@
#include "tests/xe_pci_test.h"
#include "xe_pci.h"
+#include "xe_pm.h"
static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
const char *str, struct kunit *test)
@@ -112,7 +113,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
bo->size,
ttm_bo_type_kernel,
region |
- XE_BO_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(remote)) {
KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
str, remote);
@@ -190,7 +191,7 @@ out_unlock:
static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
struct kunit *test)
{
- test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT);
+ test_copy(m, bo, test, XE_BO_FLAG_SYSTEM);
}
static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
@@ -202,9 +203,9 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
return;
if (bo->ttm.resource->mem_type == XE_PL_VRAM0)
- region = XE_BO_CREATE_VRAM1_BIT;
+ region = XE_BO_FLAG_VRAM1;
else
- region = XE_BO_CREATE_VRAM0_BIT;
+ region = XE_BO_FLAG_VRAM0;
test_copy(m, bo, test, region);
}
@@ -280,8 +281,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_PINNED_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(big)) {
KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
goto vunmap;
@@ -289,8 +290,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_PINNED_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(pt)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
PTR_ERR(pt));
@@ -300,8 +301,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
2 * SZ_4K,
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_PINNED_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(tiny)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
PTR_ERR(pt));
@@ -423,17 +424,19 @@ static int migrate_test_run_device(struct xe_device *xe)
struct xe_tile *tile;
int id;
+ xe_pm_runtime_get(xe);
+
for_each_tile(tile, xe, id) {
struct xe_migrate *m = tile->migrate;
kunit_info(test, "Testing tile id %d.\n", id);
xe_vm_lock(m->q->vm, true);
- xe_device_mem_access_get(xe);
xe_migrate_sanity_test(m, test);
- xe_device_mem_access_put(xe);
xe_vm_unlock(m->q->vm);
}
+ xe_pm_runtime_put(xe);
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index cf0c173b945f..eb0d8963419c 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -18,8 +18,3 @@ static struct kunit_suite xe_migrate_test_suite = {
};
kunit_test_suite(xe_migrate_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_migrate kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index df5c36b70ab4..1b8617075b37 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -10,10 +10,11 @@
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
-#include "xe_pci.h"
+#include "xe_device.h"
#include "xe_gt.h"
#include "xe_mocs.h"
-#include "xe_device.h"
+#include "xe_pci.h"
+#include "xe_pm.h"
struct live_mocs {
struct xe_mocs_info table;
@@ -28,6 +29,8 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
flags = get_mocs_settings(gt_to_xe(gt), &arg->table);
+ kunit_info(test, "gt %d", gt->info.id);
+ kunit_info(test, "gt type %d", gt->info.type);
kunit_info(test, "table size %d", arg->table.size);
kunit_info(test, "table uc_index %d", arg->table.uc_index);
kunit_info(test, "table n_entries %d", arg->table.n_entries);
@@ -38,69 +41,72 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
static void read_l3cc_table(struct xe_gt *gt,
const struct xe_mocs_info *info)
{
+ struct kunit *test = xe_cur_kunit();
+ u32 l3cc, l3cc_expected;
unsigned int i;
- u32 l3cc;
u32 reg_val;
u32 ret;
- struct kunit *test = xe_cur_kunit();
-
- xe_device_mem_access_get(gt_to_xe(gt));
ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
- mocs_dbg(&gt_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries);
- for (i = 0;
- i < (info->n_entries + 1) / 2 ?
- (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
- get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
- i++) {
- if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
- reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
- else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
- mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
- XELP_LNCFCMOCS(i).addr, reg_val, l3cc);
- if (reg_val != l3cc)
- KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n",
- XELP_LNCFCMOCS(i).addr);
+
+ for (i = 0; i < info->n_entries; i++) {
+ if (!(i & 1)) {
+ if (regs_are_mcr(gt))
+ reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1));
+ else
+ reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1));
+
+ mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
+ } else {
+ /* Just re-use value read on previous iteration */
+ reg_val >>= 16;
+ }
+
+ l3cc_expected = get_entry_l3cc(info, i);
+ l3cc = reg_val & 0xffff;
+
+ mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n",
+ i, l3cc_expected, l3cc);
+
+ KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc,
+ "l3cc idx=%u has incorrect val.\n", i);
}
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
- xe_device_mem_access_put(gt_to_xe(gt));
}
static void read_mocs_table(struct xe_gt *gt,
const struct xe_mocs_info *info)
{
- struct xe_device *xe = gt_to_xe(gt);
-
+ struct kunit *test = xe_cur_kunit();
+ u32 mocs, mocs_expected;
unsigned int i;
- u32 mocs;
u32 reg_val;
u32 ret;
- struct kunit *test = xe_cur_kunit();
+ KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index,
+ "Unused entries index should have been defined\n");
- xe_device_mem_access_get(gt_to_xe(gt));
ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
- mocs_dbg(&gt_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries);
- drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
- "Unused entries index should have been defined\n");
- for (i = 0;
- i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
- i++) {
- if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+
+ for (i = 0; i < info->n_entries; i++) {
+ if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
- mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
- XELP_GLOBAL_MOCS(i).addr, reg_val, mocs);
- if (reg_val != mocs)
- KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n",
- XELP_GLOBAL_MOCS(i).addr);
+
+ mocs_expected = get_entry_control(info, i);
+ mocs = reg_val;
+
+ mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n",
+ i, mocs_expected, mocs);
+
+ KUNIT_EXPECT_EQ_MSG(test, mocs_expected, mocs,
+ "mocs reg 0x%x has incorrect val.\n", i);
}
+
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
- xe_device_mem_access_put(gt_to_xe(gt));
}
static int mocs_kernel_test_run_device(struct xe_device *xe)
@@ -113,6 +119,8 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
unsigned int flags;
int id;
+ xe_pm_runtime_get(xe);
+
for_each_gt(gt, xe, id) {
flags = live_mocs_init(&mocs, gt);
if (flags & HAS_GLOBAL_MOCS)
@@ -120,6 +128,9 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
if (flags & HAS_LNCF_MOCS)
read_l3cc_table(gt, &mocs.table);
}
+
+ xe_pm_runtime_put(xe);
+
return 0;
}
@@ -139,6 +150,8 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
int id;
struct kunit *test = xe_cur_kunit();
+ xe_pm_runtime_get(xe);
+
for_each_gt(gt, xe, id) {
flags = live_mocs_init(&mocs, gt);
kunit_info(test, "mocs_reset_test before reset\n");
@@ -156,6 +169,9 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
if (flags & HAS_LNCF_MOCS)
read_l3cc_table(gt, &mocs.table);
}
+
+ xe_pm_runtime_put(xe);
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
index ee40f31e1e12..6315886b659e 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -19,8 +19,3 @@ static struct kunit_suite xe_mocs_test_suite = {
};
kunit_test_suite(xe_mocs_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_mocs kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 44570d888355..9d0c715142b9 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -71,6 +71,7 @@ static const struct platform_test_case cases[] = {
SUBPLATFORM_CASE(DG2, G12, A1),
GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+ GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
};
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 7c124475c428..541361caff3b 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -86,7 +86,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
};
xe_gt_assert(q->gt, second_idx <= bb->len);
- xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION);
+ xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
+ xe_gt_assert(q->gt, q->width == 1);
return __xe_bb_create_job(q, bb, addr);
}
@@ -96,7 +97,8 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
{
u64 addr = xe_sa_bo_gpu_addr(bb->bo);
- xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION));
+ xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
+ xe_gt_assert(q->gt, q->width == 1);
return __xe_bb_create_job(q, bb, &addr);
}
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 9c0837b6fdfc..bc1f794e3e61 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -22,6 +22,7 @@
#include "xe_gt.h"
#include "xe_map.h"
#include "xe_migrate.h"
+#include "xe_pm.h"
#include "xe_preempt_fence.h"
#include "xe_res_cursor.h"
#include "xe_trace.h"
@@ -111,7 +112,7 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
static bool xe_bo_is_user(struct xe_bo *bo)
{
- return bo->flags & XE_BO_CREATE_USER_BIT;
+ return bo->flags & XE_BO_FLAG_USER;
}
static struct xe_migrate *
@@ -137,7 +138,7 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+ if (bo_flags & XE_BO_FLAG_SYSTEM) {
xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
bo->placements[*c] = (struct ttm_place) {
@@ -164,12 +165,12 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
* For eviction / restore on suspend / resume objects
* pinned in VRAM must be contiguous
*/
- if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT))
+ if (bo_flags & (XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_GGTT))
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
if (io_size < vram->usable_size) {
- if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+ if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
place.fpfn = 0;
place.lpfn = io_size >> PAGE_SHIFT;
} else {
@@ -183,22 +184,22 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+ if (bo_flags & XE_BO_FLAG_VRAM0)
add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+ if (bo_flags & XE_BO_FLAG_VRAM1)
add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
}
static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+ if (bo_flags & XE_BO_FLAG_STOLEN) {
xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_STOLEN,
- .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT) ?
+ .flags = bo_flags & (XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_GGTT) ?
TTM_PL_FLAG_CONTIGUOUS : 0,
};
*c += 1;
@@ -339,7 +340,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
break;
}
- WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
+ WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
/*
* Display scanout is always non-coherent with the CPU cache.
@@ -347,8 +348,8 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
* For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
* require a CPU:WC mapping.
*/
- if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
- (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
+ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+ (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE))
caching = ttm_write_combined;
err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
@@ -715,7 +716,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
xe_assert(xe, migrate);
trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get_noresume(xe);
if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
/*
@@ -739,7 +740,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
ret = -EINVAL;
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
goto out;
}
@@ -757,7 +758,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
new_mem, handle_system_ccs);
if (IS_ERR(fence)) {
ret = PTR_ERR(fence);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
goto out;
}
if (!move_lacks_source) {
@@ -782,7 +783,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
dma_fence_put(fence);
}
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
out:
return ret;
@@ -794,7 +795,6 @@ out:
* @bo: The buffer object to move.
*
* On successful completion, the object memory will be moved to sytem memory.
- * This function blocks until the object has been fully moved.
*
* This is needed to for special handling of pinned VRAM object during
* suspend-resume.
@@ -851,9 +851,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (ret)
goto err_res_free;
- dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
- false, MAX_SCHEDULE_TIMEOUT);
-
return 0;
err_res_free:
@@ -866,7 +863,6 @@ err_res_free:
* @bo: The buffer object to move.
*
* On successful completion, the object memory will be moved back to VRAM.
- * This function blocks until the object has been fully moved.
*
* This is needed to for special handling of pinned VRAM object during
* suspend-resume.
@@ -908,9 +904,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (ret)
goto err_res_free;
- dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
- false, MAX_SCHEDULE_TIMEOUT);
-
return 0;
err_res_free:
@@ -1110,12 +1103,12 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
struct drm_device *ddev = tbo->base.dev;
struct xe_device *xe = to_xe_device(ddev);
struct xe_bo *bo = ttm_to_xe_bo(tbo);
- bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
+ bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
vm_fault_t ret;
int idx;
if (needs_rpm)
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get(xe);
ret = ttm_bo_vm_reserve(tbo, vmf);
if (ret)
@@ -1146,7 +1139,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
dma_resv_unlock(tbo->base.resv);
out:
if (needs_rpm)
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return ret;
}
@@ -1223,18 +1216,19 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
return ERR_PTR(-EINVAL);
}
- if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
- !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
- xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
+ if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
+ !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
+ ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
+ (flags & XE_BO_NEEDS_64K))) {
aligned_size = ALIGN(size, SZ_64K);
if (type != ttm_bo_type_device)
size = ALIGN(size, SZ_64K);
- flags |= XE_BO_INTERNAL_64K;
+ flags |= XE_BO_FLAG_INTERNAL_64K;
alignment = SZ_64K >> PAGE_SHIFT;
} else {
aligned_size = ALIGN(size, SZ_4K);
- flags &= ~XE_BO_INTERNAL_64K;
+ flags &= ~XE_BO_FLAG_INTERNAL_64K;
alignment = SZ_4K >> PAGE_SHIFT;
}
@@ -1263,11 +1257,11 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
if (resv) {
- ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
+ ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
ctx.resv = resv;
}
- if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
+ if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
if (WARN_ON(err)) {
xe_ttm_bo_destroy(&bo->ttm);
@@ -1277,7 +1271,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
/* Defer populating type_sg bos */
placement = (type == ttm_bo_type_sg ||
- bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+ bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
&bo->placement;
err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
placement, alignment,
@@ -1332,21 +1326,21 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
{
struct ttm_place *place = bo->placements;
- if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
+ if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
return -EINVAL;
place->flags = TTM_PL_FLAG_CONTIGUOUS;
place->fpfn = start >> PAGE_SHIFT;
place->lpfn = end >> PAGE_SHIFT;
- switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
- case XE_BO_CREATE_VRAM0_BIT:
+ switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
+ case XE_BO_FLAG_VRAM0:
place->mem_type = XE_PL_VRAM0;
break;
- case XE_BO_CREATE_VRAM1_BIT:
+ case XE_BO_FLAG_VRAM1:
place->mem_type = XE_PL_VRAM1;
break;
- case XE_BO_CREATE_STOLEN_BIT:
+ case XE_BO_FLAG_STOLEN:
place->mem_type = XE_PL_STOLEN;
break;
@@ -1380,7 +1374,7 @@ __xe_bo_create_locked(struct xe_device *xe,
if (IS_ERR(bo))
return bo;
- flags |= XE_BO_FIXED_PLACEMENT_BIT;
+ flags |= XE_BO_FLAG_FIXED_PLACEMENT;
err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
if (err) {
xe_bo_free(bo);
@@ -1390,7 +1384,7 @@ __xe_bo_create_locked(struct xe_device *xe,
bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
vm && !xe_vm_in_fault_mode(vm) &&
- flags & XE_BO_CREATE_USER_BIT ?
+ flags & XE_BO_FLAG_USER ?
&vm->lru_bulk_move : NULL, size,
cpu_caching, type, flags);
if (IS_ERR(bo))
@@ -1407,13 +1401,13 @@ __xe_bo_create_locked(struct xe_device *xe,
xe_vm_get(vm);
bo->vm = vm;
- if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
- if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
+ if (bo->flags & XE_BO_FLAG_GGTT) {
+ if (!tile && flags & XE_BO_FLAG_STOLEN)
tile = xe_device_get_root_tile(xe);
xe_assert(xe, tile);
- if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
+ if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
start + bo->size, U64_MAX);
} else {
@@ -1456,7 +1450,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
{
struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
cpu_caching, type,
- flags | XE_BO_CREATE_USER_BIT);
+ flags | XE_BO_FLAG_USER);
if (!IS_ERR(bo))
xe_bo_unlock_vm_held(bo);
@@ -1485,12 +1479,12 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
u64 start = offset == ~0ull ? 0 : offset;
u64 end = offset == ~0ull ? offset : start + size;
- if (flags & XE_BO_CREATE_STOLEN_BIT &&
+ if (flags & XE_BO_FLAG_STOLEN &&
xe_ttm_stolen_cpu_access_needs_ggtt(xe))
- flags |= XE_BO_CREATE_GGTT_BIT;
+ flags |= XE_BO_FLAG_GGTT;
bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_NEEDS_CPU_ACCESS);
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(bo))
return bo;
@@ -1587,13 +1581,15 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til
int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
{
struct xe_bo *bo;
+ u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
+
+ dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
xe_assert(xe, IS_DGFX(xe));
xe_assert(xe, !(*src)->vmap.is_iomem);
- bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, (*src)->size,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_GGTT_BIT);
+ bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
+ (*src)->size, dst_flags);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -1668,8 +1664,8 @@ int xe_bo_pin(struct xe_bo *bo)
xe_assert(xe, !xe_bo_is_user(bo));
/* Pinned object must be in GGTT or have pinned flag */
- xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT));
+ xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_GGTT));
/*
* No reason we can't support pinning imported dma-bufs we just don't
@@ -1690,7 +1686,7 @@ int xe_bo_pin(struct xe_bo *bo)
* during suspend / resume (force restore to same physical address).
*/
if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
- bo->flags & XE_BO_INTERNAL_TEST)) {
+ bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
struct ttm_place *place = &(bo->placements[0]);
if (mem_type_is_vram(place->mem_type)) {
@@ -1758,7 +1754,7 @@ void xe_bo_unpin(struct xe_bo *bo)
xe_assert(xe, xe_bo_is_pinned(bo));
if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
- bo->flags & XE_BO_INTERNAL_TEST)) {
+ bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
struct ttm_place *place = &(bo->placements[0]);
if (mem_type_is_vram(place->mem_type)) {
@@ -1861,7 +1857,7 @@ int xe_bo_vmap(struct xe_bo *bo)
xe_bo_assert_held(bo);
- if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
+ if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
return -EINVAL;
if (!iosys_map_is_null(&bo->vmap))
@@ -1943,29 +1939,29 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
bo_flags = 0;
if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
- bo_flags |= XE_BO_DEFER_BACKING;
+ bo_flags |= XE_BO_FLAG_DEFER_BACKING;
if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
- bo_flags |= XE_BO_SCANOUT_BIT;
+ bo_flags |= XE_BO_FLAG_SCANOUT;
- bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+ bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
- if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
+ if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
return -EINVAL;
- bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
+ bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
}
if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
+ if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
+ if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
return -EINVAL;
@@ -2206,6 +2202,9 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
+ if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
+ return false;
+
if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
return false;
@@ -2214,7 +2213,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
* can't be used since there's no CCS storage associated with
* non-VRAM addresses.
*/
- if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
+ if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
return false;
return true;
@@ -2283,9 +2282,9 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
bo = xe_bo_create_user(xe, NULL, NULL, args->size,
DRM_XE_GEM_CPU_CACHING_WC,
ttm_bo_type_device,
- XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
- XE_BO_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index c59ad15961ce..a885b14bf595 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -13,48 +13,34 @@
#include "xe_vm_types.h"
#include "xe_vm.h"
-/**
- * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
- * @vm: The vm
- */
-#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
-
-
-
#define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
-#define XE_BO_CREATE_USER_BIT BIT(0)
+#define XE_BO_FLAG_USER BIT(0)
/* The bits below need to be contiguous, or things break */
-#define XE_BO_CREATE_SYSTEM_BIT BIT(1)
-#define XE_BO_CREATE_VRAM0_BIT BIT(2)
-#define XE_BO_CREATE_VRAM1_BIT BIT(3)
-#define XE_BO_CREATE_VRAM_MASK (XE_BO_CREATE_VRAM0_BIT | \
- XE_BO_CREATE_VRAM1_BIT)
+#define XE_BO_FLAG_SYSTEM BIT(1)
+#define XE_BO_FLAG_VRAM0 BIT(2)
+#define XE_BO_FLAG_VRAM1 BIT(3)
+#define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1)
/* -- */
-#define XE_BO_CREATE_STOLEN_BIT BIT(4)
-#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \
- (IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \
- XE_BO_CREATE_SYSTEM_BIT)
-#define XE_BO_CREATE_GGTT_BIT BIT(5)
-#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
-#define XE_BO_CREATE_PINNED_BIT BIT(7)
-#define XE_BO_CREATE_NO_RESV_EVICT BIT(8)
-#define XE_BO_DEFER_BACKING BIT(9)
-#define XE_BO_SCANOUT_BIT BIT(10)
-#define XE_BO_FIXED_PLACEMENT_BIT BIT(11)
-#define XE_BO_PAGETABLE BIT(12)
-#define XE_BO_NEEDS_CPU_ACCESS BIT(13)
-#define XE_BO_NEEDS_UC BIT(14)
+#define XE_BO_FLAG_STOLEN BIT(4)
+#define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \
+ XE_BO_FLAG_VRAM0 << (tile)->id : \
+ XE_BO_FLAG_SYSTEM)
+#define XE_BO_FLAG_GGTT BIT(5)
+#define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6)
+#define XE_BO_FLAG_PINNED BIT(7)
+#define XE_BO_FLAG_NO_RESV_EVICT BIT(8)
+#define XE_BO_FLAG_DEFER_BACKING BIT(9)
+#define XE_BO_FLAG_SCANOUT BIT(10)
+#define XE_BO_FLAG_FIXED_PLACEMENT BIT(11)
+#define XE_BO_FLAG_PAGETABLE BIT(12)
+#define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13)
+#define XE_BO_FLAG_NEEDS_UC BIT(14)
+#define XE_BO_NEEDS_64K BIT(15)
+#define XE_BO_FLAG_GGTT_INVALIDATE BIT(16)
/* this one is trigger internally only */
-#define XE_BO_INTERNAL_TEST BIT(30)
-#define XE_BO_INTERNAL_64K BIT(31)
-
-#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62)
-#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61)
-#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12)
-#define XE_PPGTT_PTE_PAT2 BIT_ULL(7)
-#define XE_PPGTT_PTE_PAT1 BIT_ULL(4)
-#define XE_PPGTT_PTE_PAT0 BIT_ULL(3)
+#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
+#define XE_BO_FLAG_INTERNAL_64K BIT(31)
#define XE_PTE_SHIFT 12
#define XE_PAGE_SIZE (1 << XE_PTE_SHIFT)
@@ -68,20 +54,6 @@
#define XE_64K_PTE_MASK (XE_64K_PAGE_SIZE - 1)
#define XE_64K_PDE_MASK (XE_PDE_MASK >> 4)
-#define XE_PDE_PS_2M BIT_ULL(7)
-#define XE_PDPE_PS_1G BIT_ULL(7)
-#define XE_PDE_IPS_64K BIT_ULL(11)
-
-#define XE_GGTT_PTE_DM BIT_ULL(1)
-#define XE_USM_PPGTT_PTE_AE BIT_ULL(10)
-#define XE_PPGTT_PTE_DM BIT_ULL(11)
-#define XE_PDE_64K BIT_ULL(6)
-#define XE_PTE_PS64 BIT_ULL(8)
-#define XE_PTE_NULL BIT_ULL(9)
-
-#define XE_PAGE_PRESENT BIT_ULL(0)
-#define XE_PAGE_RW BIT_ULL(1)
-
#define XE_PL_SYSTEM TTM_PL_SYSTEM
#define XE_PL_TT TTM_PL_TT
#define XE_PL_VRAM0 TTM_PL_VRAM
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 7a264a9ca06e..541b49007d73 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -146,7 +146,7 @@ int xe_bo_restore_kernel(struct xe_device *xe)
return ret;
}
- if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+ if (bo->flags & XE_BO_FLAG_GGTT) {
struct xe_tile *tile = bo->tile;
mutex_lock(&tile->mem.ggtt->lock);
@@ -220,7 +220,7 @@ int xe_bo_restore_user(struct xe_device *xe)
list_splice_tail(&still_in_list, &xe->pinned.external_vram);
spin_unlock(&xe->pinned.lock);
- /* Wait for validate to complete */
+ /* Wait for restore to complete */
for_each_tile(tile, xe, id)
xe_tile_migrate_wait(tile);
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 01db5b27bec5..c9b30dbdc14d 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -12,6 +12,8 @@
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_gt_debugfs.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
#include "xe_step.h"
#ifdef CONFIG_DRM_XE_DEBUG
@@ -37,6 +39,8 @@ static int info(struct seq_file *m, void *data)
struct xe_gt *gt;
u8 id;
+ xe_pm_runtime_get(xe);
+
drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n",
@@ -63,11 +67,22 @@ static int info(struct seq_file *m, void *data)
gt->info.engine_mask);
}
+ xe_pm_runtime_put(xe);
+ return 0;
+}
+
+static int sriov_info(struct seq_file *m, void *data)
+{
+ struct xe_device *xe = node_to_xe(m->private);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_sriov_print_info(xe, &p);
return 0;
}
static const struct drm_info_list debugfs_list[] = {
{"info", info, 0},
+ { .name = "sriov_info", .show = sriov_info, },
};
static int forcewake_open(struct inode *inode, struct file *file)
@@ -76,8 +91,7 @@ static int forcewake_open(struct inode *inode, struct file *file)
struct xe_gt *gt;
u8 id;
- xe_device_mem_access_get(xe);
-
+ xe_pm_runtime_get(xe);
for_each_gt(gt, xe, id)
XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
@@ -92,8 +106,7 @@ static int forcewake_release(struct inode *inode, struct file *file)
for_each_gt(gt, xe, id)
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return 0;
}
@@ -127,7 +140,7 @@ void xe_debugfs_register(struct xe_device *xe)
if (man) {
char name[16];
- sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0);
+ snprintf(name, sizeof(name), "vram%d_mm", mem_type - XE_PL_VRAM0);
ttm_resource_manager_create_debugfs(man, root, name);
}
}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 68d3d623a05b..3d7980232be1 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -9,10 +9,13 @@
#include <linux/devcoredump.h>
#include <generated/utsrelease.h>
+#include <drm/drm_managed.h>
+
#include "xe_device.h"
#include "xe_exec_queue.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_guc_ct.h"
#include "xe_guc_submit.h"
#include "xe_hw_engine.h"
@@ -64,9 +67,11 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
{
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
- xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
- if (ss->vm)
- xe_vm_snapshot_capture_delayed(ss->vm);
+ /* keep going if fw fails as we still want to save the memory and SW data */
+ if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+ xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+ xe_vm_snapshot_capture_delayed(ss->vm);
+ xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
}
@@ -74,17 +79,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
- struct xe_device *xe = coredump_to_xe(coredump);
- struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+ struct xe_device *xe;
+ struct xe_devcoredump_snapshot *ss;
struct drm_printer p;
struct drm_print_iterator iter;
struct timespec64 ts;
int i;
- /* Our device is gone already... */
- if (!data || !coredump_to_xe(coredump))
+ if (!coredump)
return -ENODEV;
+ xe = coredump_to_xe(coredump);
+ ss = &coredump->snapshot;
+
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
@@ -117,10 +124,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
if (coredump->snapshot.hwe[i])
xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
&p);
- if (coredump->snapshot.vm) {
- drm_printf(&p, "\n**** VM state ****\n");
- xe_vm_snapshot_print(coredump->snapshot.vm, &p);
- }
+ drm_printf(&p, "\n**** VM state ****\n");
+ xe_vm_snapshot_print(coredump->snapshot.vm, &p);
return count - iter.remain;
}
@@ -180,10 +185,12 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
}
}
- xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ /* keep going if fw fails as we still want to save the memory and SW data */
+ if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
+ xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
- coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
+ coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
@@ -196,8 +203,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
}
- if (ss->vm)
- queue_work(system_unbound_wq, &ss->work);
+ queue_work(system_unbound_wq, &ss->work);
xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
dma_fence_end_signalling(cookie);
@@ -231,5 +237,14 @@ void xe_devcoredump(struct xe_sched_job *job)
dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
xe_devcoredump_read, xe_devcoredump_free);
}
-#endif
+static void xe_driver_devcoredump_fini(struct drm_device *drm, void *arg)
+{
+ dev_coredump_put(drm->dev);
+}
+
+int xe_devcoredump_init(struct xe_device *xe)
+{
+ return drmm_add_action_or_reset(&xe->drm, xe_driver_devcoredump_fini, xe);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index df8671f0b5eb..e2fa65ce0932 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -11,10 +11,16 @@ struct xe_sched_job;
#ifdef CONFIG_DEV_COREDUMP
void xe_devcoredump(struct xe_sched_job *job);
+int xe_devcoredump_init(struct xe_device *xe);
#else
static inline void xe_devcoredump(struct xe_sched_job *job)
{
}
+
+static inline int xe_devcoredump_init(struct xe_device *xe)
+{
+ return 0;
+}
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d32ff3857e65..55bbc8b8df15 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -20,6 +20,7 @@
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_debugfs.h"
+#include "xe_devcoredump.h"
#include "xe_dma_buf.h"
#include "xe_drm_client.h"
#include "xe_drv.h"
@@ -45,12 +46,6 @@
#include "xe_vm.h"
#include "xe_wait_user_fence.h"
-#ifdef CONFIG_LOCKDEP
-struct lockdep_map xe_device_mem_access_lockdep_map = {
- .name = "xe_device_mem_access_lockdep_map"
-};
-#endif
-
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -136,15 +131,48 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_RENDER_ALLOW),
};
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_compat_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
static const struct file_operations xe_driver_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.release = drm_release_noglobal,
- .unlocked_ioctl = drm_ioctl,
+ .unlocked_ioctl = xe_drm_ioctl,
.mmap = drm_gem_mmap,
.poll = drm_poll,
.read = drm_read,
- .compat_ioctl = drm_compat_ioctl,
+ .compat_ioctl = xe_drm_compat_ioctl,
.llseek = noop_llseek,
#ifdef CONFIG_PROC_FS
.show_fdinfo = drm_show_fdinfo,
@@ -389,8 +417,70 @@ mask_err:
return err;
}
-/*
- * Initialize MMIO resources that don't require any knowledge about tile count.
+static bool verify_lmem_ready(struct xe_gt *gt)
+{
+ u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
+
+ return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ unsigned long timeout, start;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ if (verify_lmem_ready(gt))
+ return 0;
+
+ drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+ start = jiffies;
+ timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+ do {
+ if (signal_pending(current))
+ return -EINTR;
+
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ if (time_after(jiffies, timeout)) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
+
+ msleep(20);
+
+ } while (!verify_lmem_ready(gt));
+
+ drm_dbg(&xe->drm, "lmem ready after %ums",
+ jiffies_to_msecs(jiffies - start));
+
+ return 0;
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
*/
int xe_device_probe_early(struct xe_device *xe)
{
@@ -400,7 +490,13 @@ int xe_device_probe_early(struct xe_device *xe)
if (err)
return err;
- err = xe_mmio_root_tile_init(xe);
+ xe_sriov_probe_early(xe);
+
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
+ err = wait_for_lmem_ready(xe);
if (err)
return err;
@@ -478,15 +574,15 @@ int xe_device_probe(struct xe_device *xe)
return err;
}
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
if (err)
return err;
- for_each_gt(gt, xe, id) {
- err = xe_pcode_probe(gt);
- if (err)
- return err;
- }
+ for_each_gt(gt, xe, id)
+ xe_pcode_init(gt);
err = xe_display_init_noirq(xe);
if (err)
@@ -553,11 +649,7 @@ int xe_device_probe(struct xe_device *xe)
xe_hwmon_register(xe);
- err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
- if (err)
- return err;
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
err_fini_display:
xe_display_driver_remove(xe);
@@ -621,87 +713,20 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
}
-bool xe_device_mem_access_ongoing(struct xe_device *xe)
-{
- if (xe_pm_read_callback_task(xe) != NULL)
- return true;
-
- return atomic_read(&xe->mem_access.ref);
-}
-
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
void xe_device_assert_mem_access(struct xe_device *xe)
{
- XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
-}
-
-bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
-{
- bool active;
-
- if (xe_pm_read_callback_task(xe) == current)
- return true;
-
- active = xe_pm_runtime_get_if_active(xe);
- if (active) {
- int ref = atomic_inc_return(&xe->mem_access.ref);
-
- xe_assert(xe, ref != S32_MAX);
- }
-
- return active;
-}
-
-void xe_device_mem_access_get(struct xe_device *xe)
-{
- int ref;
-
- /*
- * This looks racy, but should be fine since the pm_callback_task only
- * transitions from NULL -> current (and back to NULL again), during the
- * runtime_resume() or runtime_suspend() callbacks, for which there can
- * only be a single one running for our device. We only need to prevent
- * recursively calling the runtime_get or runtime_put from those
- * callbacks, as well as preventing triggering any access_ongoing
- * asserts.
- */
- if (xe_pm_read_callback_task(xe) == current)
- return;
-
- /*
- * Since the resume here is synchronous it can be quite easy to deadlock
- * if we are not careful. Also in practice it might be quite timing
- * sensitive to ever see the 0 -> 1 transition with the callers locks
- * held, so deadlocks might exist but are hard for lockdep to ever see.
- * With this in mind, help lockdep learn about the potentially scary
- * stuff that can happen inside the runtime_resume callback by acquiring
- * a dummy lock (it doesn't protect anything and gets compiled out on
- * non-debug builds). Lockdep then only needs to see the
- * mem_access_lockdep_map -> runtime_resume callback once, and then can
- * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
- * For example if the (callers_locks) are ever grabbed in the
- * runtime_resume callback, lockdep should give us a nice splat.
- */
- lock_map_acquire(&xe_device_mem_access_lockdep_map);
- lock_map_release(&xe_device_mem_access_lockdep_map);
-
- xe_pm_runtime_get(xe);
- ref = atomic_inc_return(&xe->mem_access.ref);
-
- xe_assert(xe, ref != S32_MAX);
-
-}
-
-void xe_device_mem_access_put(struct xe_device *xe)
-{
- int ref;
-
- if (xe_pm_read_callback_task(xe) == current)
- return;
-
- ref = atomic_dec_return(&xe->mem_access.ref);
- xe_pm_runtime_put(xe);
-
- xe_assert(xe, ref >= 0);
+ xe_assert(xe, !xe_pm_runtime_suspended(xe));
}
void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index d413bc2c6be5..36d4434ebccc 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -16,10 +16,6 @@ struct xe_file;
#include "xe_force_wake.h"
#include "xe_macros.h"
-#ifdef CONFIG_LOCKDEP
-extern struct lockdep_map xe_device_mem_access_lockdep_map;
-#endif
-
static inline struct xe_device *to_xe_device(const struct drm_device *dev)
{
return container_of(dev, struct xe_device, drm);
@@ -137,12 +133,7 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
return &gt->mmio.fw;
}
-void xe_device_mem_access_get(struct xe_device *xe);
-bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe);
-void xe_device_mem_access_put(struct xe_device *xe);
-
void xe_device_assert_mem_access(struct xe_device *xe);
-bool xe_device_mem_access_ongoing(struct xe_device *xe);
static inline bool xe_device_in_fault_mode(struct xe_device *xe)
{
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 99113a5a2b84..21677b8cd977 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -35,7 +35,9 @@ vram_d3cold_threshold_show(struct device *dev,
if (!xe)
return -EINVAL;
+ xe_pm_runtime_get(xe);
ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
+ xe_pm_runtime_put(xe);
return ret;
}
@@ -58,7 +60,9 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold);
+ xe_pm_runtime_get(xe);
ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold);
+ xe_pm_runtime_put(xe);
return ret ?: count;
}
@@ -72,18 +76,14 @@ static void xe_device_sysfs_fini(struct drm_device *drm, void *arg)
sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
}
-void xe_device_sysfs_init(struct xe_device *xe)
+int xe_device_sysfs_init(struct xe_device *xe)
{
struct device *dev = xe->drm.dev;
int ret;
ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
- if (ret) {
- drm_warn(&xe->drm, "Failed to create sysfs file\n");
- return;
- }
-
- ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
if (ret)
- drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n");
+ return ret;
+
+ return drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
}
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h
index 38b240684bee..f9e83d8bd2c7 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.h
@@ -8,6 +8,6 @@
struct xe_device;
-void xe_device_sysfs_init(struct xe_device *xe);
+int xe_device_sysfs_init(struct xe_device *xe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index faa32407efa5..2014d87765a2 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -321,6 +321,10 @@ struct xe_device {
struct {
/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
enum xe_sriov_mode __mode;
+
+ /** @sriov.pf: PF specific data */
+ struct xe_device_pf pf;
+
/** @sriov.wq: workqueue used by the virtualization workers */
struct workqueue_struct *wq;
} sriov;
@@ -380,9 +384,6 @@ struct xe_device {
* triggering additional actions when they occur.
*/
struct {
- /** @mem_access.ref: ref count of memory accesses */
- atomic_t ref;
-
/**
* @mem_access.vram_userfault: Encapsulate vram_userfault
* related stuff
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index da2627ed6ae7..68f309f5e981 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -16,6 +16,7 @@
#include "tests/xe_test.h"
#include "xe_bo.h"
#include "xe_device.h"
+#include "xe_pm.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_vm.h"
@@ -33,7 +34,7 @@ static int xe_dma_buf_attach(struct dma_buf *dmabuf,
if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT))
return -EOPNOTSUPP;
- xe_device_mem_access_get(to_xe_device(obj->dev));
+ xe_pm_runtime_get(to_xe_device(obj->dev));
return 0;
}
@@ -42,7 +43,7 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf,
{
struct drm_gem_object *obj = attach->dmabuf->priv;
- xe_device_mem_access_put(to_xe_device(obj->dev));
+ xe_pm_runtime_put(to_xe_device(obj->dev));
}
static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
@@ -216,7 +217,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
dma_resv_lock(resv, NULL);
bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
0, /* Will require 1way or 2way for vm_bind */
- ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM);
if (IS_ERR(bo)) {
ret = PTR_ERR(bo);
goto error;
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 87c10bd7958b..08f0b7c95901 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -78,7 +78,7 @@ void xe_drm_client_add_bo(struct xe_drm_client *client,
spin_lock(&client->bos_lock);
bo->client = xe_drm_client_get(client);
- list_add_tail_rcu(&bo->client_link, &client->bos_list);
+ list_add_tail(&bo->client_link, &client->bos_list);
spin_unlock(&client->bos_lock);
}
@@ -96,7 +96,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo)
struct xe_drm_client *client = bo->client;
spin_lock(&client->bos_lock);
- list_del_rcu(&bo->client_link);
+ list_del(&bo->client_link);
spin_unlock(&client->bos_lock);
xe_drm_client_put(client);
@@ -154,8 +154,8 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
/* Internal objects. */
spin_lock(&client->bos_lock);
- list_for_each_entry_rcu(bo, &client->bos_list, client_link) {
- if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount))
+ list_for_each_entry(bo, &client->bos_list, client_link) {
+ if (!kref_get_unless_zero(&bo->ttm.base.refcount))
continue;
bo_meminfo(bo, stats);
xe_bo_put(bo);
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index cc5e0f75de3c..97eeb973e897 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -216,7 +216,7 @@ retry:
goto err_unlock_list;
}
for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], NULL, fence);
+ xe_sync_entry_signal(&syncs[i], fence);
xe_exec_queue_last_fence_set(q, vm, fence);
dma_fence_put(fence);
}
@@ -294,9 +294,10 @@ retry:
drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], job,
- &job->drm.s_fence->finished);
+ for (i = 0; i < num_syncs; i++) {
+ xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished);
+ xe_sched_job_init_user_fence(job, &syncs[i]);
+ }
if (xe_exec_queue_is_lr(q))
q->ring_ops->emit_job(job);
@@ -320,10 +321,7 @@ err_put_job:
err_exec:
drm_exec_fini(exec);
err_unlock_list:
- if (write_locked)
- up_write(&vm->lock);
- else
- up_read(&vm->lock);
+ up_read(&vm->lock);
if (err == -EAGAIN && !skip_retry)
goto retry;
err_syncs:
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index ead25d5e723e..395de93579fa 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -31,7 +31,14 @@ enum xe_exec_queue_sched_prop {
};
static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
- u64 extensions, int ext_number, bool create);
+ u64 extensions, int ext_number);
+
+static void __xe_exec_queue_free(struct xe_exec_queue *q)
+{
+ if (q->vm)
+ xe_vm_put(q->vm);
+ kfree(q);
+}
static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
struct xe_vm *vm,
@@ -74,21 +81,21 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
else
q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
+ if (vm)
+ q->vm = xe_vm_get(vm);
+
if (extensions) {
/*
* may set q->usm, must come before xe_lrc_init(),
* may overwrite q->sched_props, must come before q->ops->init()
*/
- err = exec_queue_user_extensions(xe, q, extensions, 0, true);
+ err = exec_queue_user_extensions(xe, q, extensions, 0);
if (err) {
- kfree(q);
+ __xe_exec_queue_free(q);
return ERR_PTR(err);
}
}
- if (vm)
- q->vm = xe_vm_get(vm);
-
if (xe_exec_queue_is_parallel(q)) {
q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
@@ -97,13 +104,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
return q;
}
-static void __xe_exec_queue_free(struct xe_exec_queue *q)
-{
- if (q->vm)
- xe_vm_put(q->vm);
- kfree(q);
-}
-
static int __xe_exec_queue_init(struct xe_exec_queue *q)
{
struct xe_device *xe = gt_to_xe(q->gt);
@@ -128,7 +128,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
* already grabbed the rpm ref outside any sensitive locks.
*/
if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
- drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
+ xe_pm_runtime_get_noresume(xe);
return 0;
@@ -217,7 +217,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
for (i = 0; i < q->width; ++i)
xe_lrc_finish(q->lrc + i);
if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
- xe_device_mem_access_put(gt_to_xe(q->gt));
+ xe_pm_runtime_put(gt_to_xe(q->gt));
__xe_exec_queue_free(q);
}
@@ -225,22 +225,22 @@ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
{
switch (q->class) {
case XE_ENGINE_CLASS_RENDER:
- sprintf(q->name, "rcs%d", instance);
+ snprintf(q->name, sizeof(q->name), "rcs%d", instance);
break;
case XE_ENGINE_CLASS_VIDEO_DECODE:
- sprintf(q->name, "vcs%d", instance);
+ snprintf(q->name, sizeof(q->name), "vcs%d", instance);
break;
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
- sprintf(q->name, "vecs%d", instance);
+ snprintf(q->name, sizeof(q->name), "vecs%d", instance);
break;
case XE_ENGINE_CLASS_COPY:
- sprintf(q->name, "bcs%d", instance);
+ snprintf(q->name, sizeof(q->name), "bcs%d", instance);
break;
case XE_ENGINE_CLASS_COMPUTE:
- sprintf(q->name, "ccs%d", instance);
+ snprintf(q->name, sizeof(q->name), "ccs%d", instance);
break;
case XE_ENGINE_CLASS_OTHER:
- sprintf(q->name, "gsccs%d", instance);
+ snprintf(q->name, sizeof(q->name), "gsccs%d", instance);
break;
default:
XE_WARN_ON(q->class);
@@ -268,7 +268,7 @@ xe_exec_queue_device_get_max_priority(struct xe_device *xe)
}
static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
- u64 value, bool create)
+ u64 value)
{
if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
return -EINVAL;
@@ -276,9 +276,6 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q
if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
return -EPERM;
- if (!create)
- return q->ops->set_priority(q, value);
-
q->sched_props.priority = value;
return 0;
}
@@ -336,7 +333,7 @@ xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
}
static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
- u64 value, bool create)
+ u64 value)
{
u32 min = 0, max = 0;
@@ -347,16 +344,13 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
!xe_hw_engine_timeout_in_range(value, min, max))
return -EINVAL;
- if (!create)
- return q->ops->set_timeslice(q, value);
-
q->sched_props.timeslice_us = value;
return 0;
}
typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
- u64 value, bool create);
+ u64 value);
static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
@@ -365,8 +359,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
static int exec_queue_user_ext_set_property(struct xe_device *xe,
struct xe_exec_queue *q,
- u64 extension,
- bool create)
+ u64 extension)
{
u64 __user *address = u64_to_user_ptr(extension);
struct drm_xe_ext_set_property ext;
@@ -388,21 +381,20 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
if (!exec_queue_set_property_funcs[idx])
return -EINVAL;
- return exec_queue_set_property_funcs[idx](xe, q, ext.value, create);
+ return exec_queue_set_property_funcs[idx](xe, q, ext.value);
}
typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
- u64 extension,
- bool create);
+ u64 extension);
-static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = {
+static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = {
[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
};
#define MAX_USER_EXTENSIONS 16
static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
- u64 extensions, int ext_number, bool create)
+ u64 extensions, int ext_number)
{
u64 __user *address = u64_to_user_ptr(extensions);
struct drm_xe_user_extension ext;
@@ -423,13 +415,13 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
idx = array_index_nospec(ext.name,
ARRAY_SIZE(exec_queue_user_extension_funcs));
- err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create);
+ err = exec_queue_user_extension_funcs[idx](xe, q, extensions);
if (XE_IOCTL_DBG(xe, err))
return err;
if (ext.next_extension)
return exec_queue_user_extensions(xe, q, ext.next_extension,
- ++ext_number, create);
+ ++ext_number);
return 0;
}
@@ -597,7 +589,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
/* The migration vm doesn't hold rpm ref */
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get_noresume(xe);
flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0);
@@ -606,7 +598,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
args->width, hwe, flags,
args->extensions);
- xe_device_mem_access_put(xe); /* now held by engine */
+ xe_pm_runtime_put(xe); /* now held by engine */
xe_vm_put(migrate_vm);
if (IS_ERR(new)) {
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 462b33195032..ee78d497d838 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -76,14 +76,12 @@ struct xe_exec_queue {
#define EXEC_QUEUE_FLAG_KERNEL BIT(1)
/* kernel engine only destroyed at driver unload */
#define EXEC_QUEUE_FLAG_PERMANENT BIT(2)
-/* queue keeps running pending jobs after destroy ioctl */
-#define EXEC_QUEUE_FLAG_PERSISTENT BIT(3)
/* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */
-#define EXEC_QUEUE_FLAG_VM BIT(4)
+#define EXEC_QUEUE_FLAG_VM BIT(3)
/* child of VM queue for multi-tile VM jobs */
-#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5)
+#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(4)
/* kernel exec_queue only, set priority to highest level */
-#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(6)
+#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(5)
/**
* @flags: flags for this exec queue, should statically setup aside from ban
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index ab96edb058d6..0d541f55b4fc 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -5,12 +5,14 @@
#include "xe_ggtt.h"
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sizes.h>
#include <drm/drm_managed.h>
#include <drm/i915_drm.h>
#include "regs/xe_gt_regs.h"
+#include "regs/xe_gtt_defs.h"
#include "regs/xe_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
@@ -19,16 +21,10 @@
#include "xe_gt_printk.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_map.h"
-#include "xe_mmio.h"
+#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_wopcm.h"
-#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52)
-#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53)
-
-/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
-#define GUC_GGTT_TOP 0xFEE00000
-
static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
u16 pat_index)
{
@@ -200,20 +196,20 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
}
+static void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
+
static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
{
struct drm_mm_node *hole;
u64 start, end;
/* Display may have allocated inside ggtt, so be careful with clearing here */
- xe_device_mem_access_get(tile_to_xe(ggtt->tile));
mutex_lock(&ggtt->lock);
drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
xe_ggtt_clear(ggtt, start, end - start);
xe_ggtt_invalidate(ggtt);
mutex_unlock(&ggtt->lock);
- xe_device_mem_access_put(tile_to_xe(ggtt->tile));
}
int xe_ggtt_init(struct xe_ggtt *ggtt)
@@ -227,11 +223,11 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
* scratch entires, rather keep the scratch page in system memory on
* platforms where 64K pages are needed for VRAM.
*/
- flags = XE_BO_CREATE_PINNED_BIT;
+ flags = XE_BO_FLAG_PINNED;
if (ggtt->flags & XE_GGTT_FLAGS_64K)
- flags |= XE_BO_CREATE_SYSTEM_BIT;
+ flags |= XE_BO_FLAG_SYSTEM;
else
- flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile);
+ flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile);
ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags);
if (IS_ERR(ggtt->scratch)) {
@@ -249,51 +245,19 @@ err:
return err;
}
-#define GUC_TLB_INV_CR XE_REG(0xcee8)
-#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0)
-#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c)
-#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0)
-#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80)
-#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6)
-
static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
{
+ int err;
+
if (!gt)
return;
- /*
- * Invalidation can happen when there's no in-flight work keeping the
- * GT awake. We need to explicitly grab forcewake to ensure the GT
- * and GuC are accessible.
- */
- xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-
- /* TODO: vfunc for GuC vs. non-GuC */
-
- if (gt->uc.guc.submission_state.enabled) {
- int seqno;
-
- seqno = xe_gt_tlb_invalidation_guc(gt);
- xe_gt_assert(gt, seqno > 0);
- if (seqno > 0)
- xe_gt_tlb_invalidation_wait(gt, seqno);
- } else if (xe_device_uc_enabled(gt_to_xe(gt))) {
- struct xe_device *xe = gt_to_xe(gt);
-
- if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
- PVC_GUC_TLB_INV_DESC1_INVALIDATE);
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
- PVC_GUC_TLB_INV_DESC0_VALID);
- } else
- xe_mmio_write32(gt, GUC_TLB_INV_CR,
- GUC_TLB_INV_CR_INVALIDATE);
- }
-
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ err = xe_gt_tlb_invalidation_ggtt(gt);
+ if (err)
+ drm_warn(&gt_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err);
}
-void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
+static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
{
/* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
@@ -410,7 +374,7 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
- u16 cache_mode = bo->flags & XE_BO_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
+ u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
u64 start = bo->ggtt_node.start;
u64 offset, pte;
@@ -419,8 +383,6 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
xe_ggtt_set_pte(ggtt, start + offset, pte);
}
-
- xe_ggtt_invalidate(ggtt);
}
static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
@@ -442,14 +404,17 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
if (err)
return err;
- xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+ xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
mutex_lock(&ggtt->lock);
err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
alignment, 0, start, end, 0);
if (!err)
xe_ggtt_map_bo(ggtt, bo);
mutex_unlock(&ggtt->lock);
- xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+
+ if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE)
+ xe_ggtt_invalidate(ggtt);
+ xe_pm_runtime_put(tile_to_xe(ggtt->tile));
return err;
}
@@ -465,19 +430,21 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
}
-void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+ bool invalidate)
{
- xe_device_mem_access_get(tile_to_xe(ggtt->tile));
- mutex_lock(&ggtt->lock);
+ xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
+ mutex_lock(&ggtt->lock);
xe_ggtt_clear(ggtt, node->start, node->size);
drm_mm_remove_node(node);
node->size = 0;
+ mutex_unlock(&ggtt->lock);
- xe_ggtt_invalidate(ggtt);
+ if (invalidate)
+ xe_ggtt_invalidate(ggtt);
- mutex_unlock(&ggtt->lock);
- xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+ xe_pm_runtime_put(tile_to_xe(ggtt->tile));
}
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
@@ -488,8 +455,53 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
/* This BO is not currently in the GGTT */
xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
- xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
+ xe_ggtt_remove_node(ggtt, &bo->ggtt_node,
+ bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
+}
+
+#ifdef CONFIG_PCI_IOV
+static u64 xe_encode_vfid_pte(u16 vfid)
+{
+ return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT;
+}
+
+static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid)
+{
+ u64 start = node->start;
+ u64 size = node->size;
+ u64 end = start + size - 1;
+ u64 pte = xe_encode_vfid_pte(vfid);
+
+ lockdep_assert_held(&ggtt->lock);
+
+ if (!drm_mm_node_allocated(node))
+ return;
+
+ while (start < end) {
+ xe_ggtt_set_pte(ggtt, start, pte);
+ start += XE_PAGE_SIZE;
+ }
+
+ xe_ggtt_invalidate(ggtt);
+}
+
+/**
+ * xe_ggtt_assign - assign a GGTT region to the VF
+ * @ggtt: the &xe_ggtt where the node belongs
+ * @node: the &drm_mm_node to update
+ * @vfid: the VF identifier
+ *
+ * This function is used by the PF driver to assign a GGTT region to the VF.
+ * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some
+ * platforms VFs can't modify that either.
+ */
+void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid)
+{
+ mutex_lock(&ggtt->lock);
+ xe_ggtt_assign_locked(ggtt, node, vfid);
+ mutex_unlock(&ggtt->lock);
}
+#endif
int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
{
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 42705e1338e1..4a41a1762358 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -11,7 +11,6 @@
struct drm_printer;
void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
-void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
int xe_ggtt_init_early(struct xe_ggtt *ggtt);
int xe_ggtt_init(struct xe_ggtt *ggtt);
void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
@@ -24,7 +23,8 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
struct drm_mm_node *node,
u32 size, u32 align, u32 mm_flags);
-void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+ bool invalidate);
void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
@@ -33,4 +33,8 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
+#ifdef CONFIG_PCI_IOV
+void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid);
+#endif
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index a61994292c43..60202b903687 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -17,15 +17,18 @@
#include "xe_gsc_proxy.h"
#include "xe_gsc_submit.h"
#include "xe_gt.h"
+#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_huc.h"
#include "xe_map.h"
#include "xe_mmio.h"
+#include "xe_pm.h"
#include "xe_sched_job.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
#include "instructions/xe_gsc_commands.h"
#include "regs/xe_gsc_regs.h"
+#include "regs/xe_gt_regs.h"
static struct xe_gt *
gsc_to_gt(struct xe_gsc *gsc)
@@ -127,8 +130,8 @@ static int query_compatibility_version(struct xe_gsc *gsc)
bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
ttm_bo_type_kernel,
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT);
if (IS_ERR(bo)) {
xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
return PTR_ERR(bo);
@@ -250,9 +253,30 @@ static int gsc_upload(struct xe_gsc *gsc)
static int gsc_upload_and_init(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
+ struct xe_tile *tile = gt_to_tile(gt);
int ret;
+ if (XE_WA(gt, 14018094691)) {
+ ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+
+ /*
+ * If the forcewake fails we want to keep going, because the worst
+ * case outcome in failing to apply the WA is that PXP won't work,
+ * which is not fatal. We still throw a warning so the issue is
+ * seen if it happens.
+ */
+ xe_gt_WARN_ON(tile->primary_gt, ret);
+
+ xe_gt_mcr_multicast_write(tile->primary_gt,
+ EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK,
+ EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT);
+ }
+
ret = gsc_upload(gsc);
+
+ if (XE_WA(gt, 14018094691))
+ xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+
if (ret)
return ret;
@@ -272,6 +296,44 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
return 0;
}
+static int gsc_er_complete(struct xe_gt *gt)
+{
+ u32 er_status;
+
+ if (!gsc_fw_is_loaded(gt))
+ return 0;
+
+ /*
+ * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the
+ * driver or the GuC hit the GDRST register, the CS is immediately reset
+ * and a success is reported, but the GSC shim keeps resetting in the
+ * background. While the shim reset is ongoing, the CS is able to accept
+ * new context submission, but any commands that require the shim will
+ * be stalled until the reset is completed. This means that we can keep
+ * submitting to the GSCCS as long as we make sure that the preemption
+ * timeout is big enough to cover any delay introduced by the reset.
+ * When the shim reset completes, a specific CS interrupt is triggered,
+ * in response to which we need to check the GSCI_TIMER_STATUS register
+ * to see if the reset was successful or not.
+ * Note that the GSCI_TIMER_STATUS register is not power save/restored,
+ * so it gets reset on MC6 entry. However, a reset failure stops MC6,
+ * so in that scenario we're always guaranteed to find the correct
+ * value.
+ */
+ er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
+
+ if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) {
+ /*
+ * XXX: we should trigger an FLR here, but we don't have support
+ * for that yet.
+ */
+ xe_gt_err(gt, "GSC ER timed out!\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
static void gsc_work(struct work_struct *work)
{
struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
@@ -285,8 +347,14 @@ static void gsc_work(struct work_struct *work)
gsc->work_actions = 0;
spin_unlock_irq(&gsc->lock);
- xe_device_mem_access_get(xe);
- xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ xe_pm_runtime_get(xe);
+ xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
+
+ if (actions & GSC_ACTION_ER_COMPLETE) {
+ ret = gsc_er_complete(gt);
+ if (ret)
+ goto out;
+ }
if (actions & GSC_ACTION_FW_LOAD) {
ret = gsc_upload_and_init(gsc);
@@ -299,8 +367,26 @@ static void gsc_work(struct work_struct *work)
if (actions & GSC_ACTION_SW_PROXY)
xe_gsc_proxy_request_handler(gsc);
+out:
xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
+}
+
+void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_gsc *gsc = &gt->uc.gsc;
+
+ if (unlikely(!intr_vec))
+ return;
+
+ if (intr_vec & GSC_ER_COMPLETE) {
+ spin_lock(&gsc->lock);
+ gsc->work_actions |= GSC_ACTION_ER_COMPLETE;
+ spin_unlock(&gsc->lock);
+
+ queue_work(gsc->wq, &gsc->work);
+ }
}
int xe_gsc_init(struct xe_gsc *gsc)
@@ -382,8 +468,8 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
ttm_bo_type_kernel,
- XE_BO_CREATE_STOLEN_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index c6fb32e3fd79..dd16e9b8b894 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -9,12 +9,14 @@
#include "xe_gsc_types.h"
struct xe_gt;
+struct xe_hw_engine;
int xe_gsc_init(struct xe_gsc *gsc);
int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
void xe_gsc_load_start(struct xe_gsc *gsc);
void xe_gsc_remove(struct xe_gsc *gsc);
+void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec);
void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep);
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 309ef80e3b95..1b908d238bd1 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -66,7 +66,7 @@ static inline struct xe_device *kdev_to_xe(struct device *kdev)
return dev_get_drvdata(kdev);
}
-static bool gsc_proxy_init_done(struct xe_gsc *gsc)
+bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
@@ -403,7 +403,6 @@ static int proxy_channel_alloc(struct xe_gsc *gsc)
struct xe_device *xe = gt_to_xe(gt);
struct xe_bo *bo;
void *csme;
- int err;
csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL);
if (!csme)
@@ -411,8 +410,8 @@ static int proxy_channel_alloc(struct xe_gsc *gsc)
bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE,
ttm_bo_type_kernel,
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT);
if (IS_ERR(bo)) {
kfree(csme);
return PTR_ERR(bo);
@@ -424,11 +423,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc)
gsc->proxy.to_csme = csme;
gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE;
- err = drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc);
- if (err)
- return err;
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc);
}
/**
@@ -528,7 +523,7 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc)
if (err)
return err;
- if (!gsc_proxy_init_done(gsc)) {
+ if (!xe_gsc_proxy_init_done(gsc)) {
xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n");
return -EIO;
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h
index 908f9441f093..c511ade6b863 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.h
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h
@@ -11,6 +11,7 @@
struct xe_gsc;
int xe_gsc_proxy_init(struct xe_gsc *gsc);
+bool xe_gsc_proxy_init_done(struct xe_gsc *gsc);
void xe_gsc_proxy_remove(struct xe_gsc *gsc);
int xe_gsc_proxy_start(struct xe_gsc *gsc);
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
index 348994b271be..d34d03248843 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.c
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -41,6 +41,21 @@ gsc_to_gt(struct xe_gsc *gsc)
}
/**
+ * xe_gsc_create_host_session_id - Creates a random 64 bit host_session id with
+ * bits 56-63 masked.
+ *
+ * Returns: random host_session_id which can be used to send messages to gsc cs
+ */
+u64 xe_gsc_create_host_session_id(void)
+{
+ u64 host_session_id;
+
+ get_random_bytes(&host_session_id, sizeof(u64));
+ host_session_id &= ~HOST_SESSION_CLIENT_MASK;
+ return host_session_id;
+}
+
+/**
* xe_gsc_emit_header - write the MTL GSC header in memory
* @xe: the Xe device
* @map: the iosys map to write to
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h
index 1939855031a6..1416b5745a4c 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.h
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.h
@@ -28,4 +28,5 @@ int xe_gsc_read_out_header(struct xe_device *xe,
int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
u64 addr_out, u32 size_out);
+u64 xe_gsc_create_host_session_id(void);
#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
index 138d8cc0f19c..5926de20214c 100644
--- a/drivers/gpu/drm/xe/xe_gsc_types.h
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -47,6 +47,7 @@ struct xe_gsc {
u32 work_actions;
#define GSC_ACTION_FW_LOAD BIT(0)
#define GSC_ACTION_SW_PROXY BIT(1)
+#define GSC_ACTION_ER_COMPLETE BIT(2)
/** @proxy: sub-structure containing the SW proxy-related variables */
struct {
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index a0afe1ba6dd5..491d0413de15 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -29,6 +29,7 @@
#include "xe_gt_mcr.h"
#include "xe_gt_pagefault.h"
#include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_gt_topology.h"
@@ -43,6 +44,7 @@
#include "xe_migrate.h"
#include "xe_mmio.h"
#include "xe_pat.h"
+#include "xe_pm.h"
#include "xe_mocs.h"
#include "xe_reg_sr.h"
#include "xe_ring_ops.h"
@@ -310,6 +312,12 @@ int xe_gt_init_early(struct xe_gt *gt)
{
int err;
+ if (IS_SRIOV_PF(gt_to_xe(gt))) {
+ err = xe_gt_sriov_pf_init_early(gt);
+ if (err)
+ return err;
+ }
+
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
return err;
@@ -346,7 +354,6 @@ static int gt_fw_domain_init(struct xe_gt *gt)
{
int err, i;
- xe_device_mem_access_get(gt_to_xe(gt));
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
goto err_hw_fence_irq;
@@ -359,7 +366,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
}
- xe_gt_idle_sysfs_init(&gt->gtidle);
+ err = xe_gt_idle_sysfs_init(&gt->gtidle);
+ if (err)
+ goto err_force_wake;
/* Enable per hw engine IRQs */
xe_irq_enable_hwe(gt);
@@ -373,12 +382,12 @@ static int gt_fw_domain_init(struct xe_gt *gt)
err = xe_hw_engine_class_sysfs_init(gt);
if (err)
- drm_warn(&gt_to_xe(gt)->drm,
- "failed to register engines sysfs directory, err: %d\n",
- err);
+ goto err_force_wake;
/* Initialize CCS mode sysfs after early initialization of HW engines */
- xe_gt_ccs_mode_sysfs_init(gt);
+ err = xe_gt_ccs_mode_sysfs_init(gt);
+ if (err)
+ goto err_force_wake;
/*
* Stash hardware-reported version. Since this register does not exist
@@ -388,7 +397,6 @@ static int gt_fw_domain_init(struct xe_gt *gt)
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
XE_WARN_ON(err);
- xe_device_mem_access_put(gt_to_xe(gt));
return 0;
@@ -398,7 +406,6 @@ err_force_wake:
err_hw_fence_irq:
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
- xe_device_mem_access_put(gt_to_xe(gt));
return err;
}
@@ -407,7 +414,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
{
int err, i;
- xe_device_mem_access_get(gt_to_xe(gt));
err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (err)
goto err_hw_fence_irq;
@@ -473,7 +479,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
XE_WARN_ON(err);
- xe_device_mem_access_put(gt_to_xe(gt));
return 0;
@@ -482,7 +487,6 @@ err_force_wake:
err_hw_fence_irq:
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
- xe_device_mem_access_put(gt_to_xe(gt));
return err;
}
@@ -495,7 +499,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
{
int err;
- xe_device_mem_access_get(gt_to_xe(gt));
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
goto out;
@@ -518,8 +521,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
out_fw:
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
out:
- xe_device_mem_access_put(gt_to_xe(gt));
-
return err;
}
@@ -545,13 +546,17 @@ int xe_gt_init(struct xe_gt *gt)
xe_mocs_init_early(gt);
- xe_gt_sysfs_init(gt);
+ err = xe_gt_sysfs_init(gt);
+ if (err)
+ return err;
err = gt_fw_domain_init(gt);
if (err)
return err;
- xe_gt_freq_init(gt);
+ err = xe_gt_freq_init(gt);
+ if (err)
+ return err;
xe_force_wake_init_engines(gt, gt_to_fw(gt));
@@ -559,11 +564,7 @@ int xe_gt_init(struct xe_gt *gt)
if (err)
return err;
- err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
- if (err)
- return err;
-
- return 0;
+ return drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
}
static int do_gt_reset(struct xe_gt *gt)
@@ -643,9 +644,9 @@ static int gt_reset(struct xe_gt *gt)
goto err_fail;
}
+ xe_pm_runtime_get(gt_to_xe(gt));
xe_gt_sanitize(gt);
- xe_device_mem_access_get(gt_to_xe(gt));
err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (err)
goto err_msg;
@@ -669,8 +670,8 @@ static int gt_reset(struct xe_gt *gt)
goto err_out;
err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- xe_device_mem_access_put(gt_to_xe(gt));
XE_WARN_ON(err);
+ xe_pm_runtime_put(gt_to_xe(gt));
xe_gt_info(gt, "reset done\n");
@@ -680,7 +681,7 @@ err_out:
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
XE_WARN_ON(xe_uc_start(&gt->uc));
- xe_device_mem_access_put(gt_to_xe(gt));
+ xe_pm_runtime_put(gt_to_xe(gt));
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
@@ -710,22 +711,20 @@ void xe_gt_reset_async(struct xe_gt *gt)
void xe_gt_suspend_prepare(struct xe_gt *gt)
{
- xe_device_mem_access_get(gt_to_xe(gt));
XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_uc_stop_prepare(&gt->uc);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
- xe_device_mem_access_put(gt_to_xe(gt));
}
int xe_gt_suspend(struct xe_gt *gt)
{
int err;
+ xe_gt_dbg(gt, "suspending\n");
xe_gt_sanitize(gt);
- xe_device_mem_access_get(gt_to_xe(gt));
err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (err)
goto err_msg;
@@ -735,15 +734,13 @@ int xe_gt_suspend(struct xe_gt *gt)
goto err_force_wake;
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
- xe_device_mem_access_put(gt_to_xe(gt));
- xe_gt_info(gt, "suspended\n");
+ xe_gt_dbg(gt, "suspended\n");
return 0;
err_force_wake:
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
- xe_device_mem_access_put(gt_to_xe(gt));
xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
return err;
@@ -753,7 +750,7 @@ int xe_gt_resume(struct xe_gt *gt)
{
int err;
- xe_device_mem_access_get(gt_to_xe(gt));
+ xe_gt_dbg(gt, "resuming\n");
err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (err)
goto err_msg;
@@ -763,15 +760,13 @@ int xe_gt_resume(struct xe_gt *gt)
goto err_force_wake;
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
- xe_device_mem_access_put(gt_to_xe(gt));
- xe_gt_info(gt, "resumed\n");
+ xe_gt_dbg(gt, "resumed\n");
return 0;
err_force_wake:
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
- xe_device_mem_access_put(gt_to_xe(gt));
xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 529fc286cd06..396aeb5b9924 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -167,25 +167,20 @@ static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg)
* and it is expected that there are no open drm clients while doing so.
* The number of available compute slices is exposed to user through a per-gt
* 'num_cslices' sysfs interface.
+ *
+ * Returns: Returns error value for failure and 0 for success.
*/
-void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
+int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
int err;
if (!xe_gt_ccs_mode_enabled(gt))
- return;
+ return 0;
err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs);
- if (err) {
- drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err);
- return;
- }
+ if (err)
+ return err;
- err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
- if (err) {
- sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
- }
+ return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
index f39975aaaab0..f8779852cf0d 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
@@ -12,7 +12,7 @@
#include "xe_platform_types.h"
void xe_gt_apply_ccs_mode(struct xe_gt *gt);
-void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
+int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
{
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 937054e31d72..c7bca20f6b65 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -78,8 +78,3 @@ int xe_gt_clock_init(struct xe_gt *gt)
gt->info.reference_clock = freq;
return 0;
}
-
-u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count)
-{
- return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock);
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
index aa162722f859..44fa0371b973 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.h
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -11,5 +11,5 @@
struct xe_gt;
int xe_gt_clock_init(struct xe_gt *gt);
-u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c4b67cf09f8f..ff7f4cf52fa9 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -18,193 +18,246 @@
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_pat.h"
+#include "xe_pm.h"
#include "xe_reg_sr.h"
#include "xe_reg_whitelist.h"
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
-static struct xe_gt *node_to_gt(struct drm_info_node *node)
+/**
+ * xe_gt_debugfs_simple_show - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * This callback can be used in struct drm_info_list to describe debugfs
+ * files that are &xe_gt specific.
+ *
+ * It is assumed that those debugfs files will be created on directory entry
+ * which struct dentry d_inode->i_private points to &xe_gt.
+ *
+ * This function assumes that &m->private will be set to the &struct
+ * drm_info_node corresponding to the instance of the info on a given &struct
+ * drm_minor (see struct drm_info_list.show for details).
+ *
+ * This function also assumes that struct drm_info_list.data will point to the
+ * function code that will actually print a file content::
+ *
+ * int (*print)(struct xe_gt *, struct drm_printer *)
+ *
+ * Example::
+ *
+ * int foo(struct xe_gt *gt, struct drm_printer *p)
+ * {
+ * drm_printf(p, "GT%u\n", gt->info.id);
+ * return 0;
+ * }
+ *
+ * static const struct drm_info_list bar[] = {
+ * { name = "foo", .show = xe_gt_debugfs_simple_show, .data = foo },
+ * };
+ *
+ * dir = debugfs_create_dir("gt", parent);
+ * dir->d_inode->i_private = gt;
+ * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), dir, minor);
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_debugfs_simple_show(struct seq_file *m, void *data)
{
- return node->info_ent->data;
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct drm_info_node *node = m->private;
+ struct dentry *parent = node->dent->d_parent;
+ struct xe_gt *gt = parent->d_inode->i_private;
+ int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data;
+
+ if (WARN_ON(!print))
+ return -EINVAL;
+
+ return print(gt, &p);
}
-static int hw_engines(struct seq_file *m, void *data)
+static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
struct xe_device *xe = gt_to_xe(gt);
- struct drm_printer p = drm_seq_file_printer(m);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
int err;
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get(xe);
err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (err) {
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return err;
}
for_each_hw_engine(hwe, gt, id)
- xe_hw_engine_print(hwe, &p);
+ xe_hw_engine_print(hwe, p);
err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
if (err)
return err;
return 0;
}
-static int force_reset(struct seq_file *m, void *data)
+static int force_reset(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
-
+ xe_pm_runtime_get(gt_to_xe(gt));
xe_gt_reset_async(gt);
+ xe_pm_runtime_put(gt_to_xe(gt));
return 0;
}
-static int sa_info(struct seq_file *m, void *data)
+static int sa_info(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_tile *tile = gt_to_tile(node_to_gt(m->private));
- struct drm_printer p = drm_seq_file_printer(m);
+ struct xe_tile *tile = gt_to_tile(gt);
- drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p,
+ xe_pm_runtime_get(gt_to_xe(gt));
+ drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
tile->mem.kernel_bb_pool->gpu_addr);
+ xe_pm_runtime_put(gt_to_xe(gt));
return 0;
}
-static int topology(struct seq_file *m, void *data)
+static int topology(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
- struct drm_printer p = drm_seq_file_printer(m);
-
- xe_gt_topology_dump(gt, &p);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_gt_topology_dump(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
return 0;
}
-static int steering(struct seq_file *m, void *data)
+static int steering(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
- struct drm_printer p = drm_seq_file_printer(m);
-
- xe_gt_mcr_steering_dump(gt, &p);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_gt_mcr_steering_dump(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
return 0;
}
-static int ggtt(struct seq_file *m, void *data)
+static int ggtt(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
- struct drm_printer p = drm_seq_file_printer(m);
+ int ret;
+
+ xe_pm_runtime_get(gt_to_xe(gt));
+ ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
- return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p);
+ return ret;
}
-static int register_save_restore(struct seq_file *m, void *data)
+static int register_save_restore(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
- struct drm_printer p = drm_seq_file_printer(m);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- xe_reg_sr_dump(&gt->reg_sr, &p);
- drm_printf(&p, "\n");
+ xe_pm_runtime_get(gt_to_xe(gt));
- drm_printf(&p, "Engine\n");
+ xe_reg_sr_dump(&gt->reg_sr, p);
+ drm_printf(p, "\n");
+
+ drm_printf(p, "Engine\n");
for_each_hw_engine(hwe, gt, id)
- xe_reg_sr_dump(&hwe->reg_sr, &p);
- drm_printf(&p, "\n");
+ xe_reg_sr_dump(&hwe->reg_sr, p);
+ drm_printf(p, "\n");
- drm_printf(&p, "LRC\n");
+ drm_printf(p, "LRC\n");
for_each_hw_engine(hwe, gt, id)
- xe_reg_sr_dump(&hwe->reg_lrc, &p);
- drm_printf(&p, "\n");
+ xe_reg_sr_dump(&hwe->reg_lrc, p);
+ drm_printf(p, "\n");
- drm_printf(&p, "Whitelist\n");
+ drm_printf(p, "Whitelist\n");
for_each_hw_engine(hwe, gt, id)
- xe_reg_whitelist_dump(&hwe->reg_whitelist, &p);
+ xe_reg_whitelist_dump(&hwe->reg_whitelist, p);
+
+ xe_pm_runtime_put(gt_to_xe(gt));
return 0;
}
-static int workarounds(struct seq_file *m, void *data)
+static int workarounds(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
- struct drm_printer p = drm_seq_file_printer(m);
-
- xe_wa_dump(gt, &p);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_wa_dump(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
return 0;
}
-static int pat(struct seq_file *m, void *data)
+static int pat(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt *gt = node_to_gt(m->private);
- struct drm_printer p = drm_seq_file_printer(m);
-
- xe_pat_dump(gt, &p);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_pat_dump(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
return 0;
}
-static int rcs_default_lrc(struct seq_file *m, void *data)
+static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- struct drm_printer p = drm_seq_file_printer(m);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER);
+ xe_pm_runtime_put(gt_to_xe(gt));
- xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER);
return 0;
}
-static int ccs_default_lrc(struct seq_file *m, void *data)
+static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- struct drm_printer p = drm_seq_file_printer(m);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE);
+ xe_pm_runtime_put(gt_to_xe(gt));
- xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE);
return 0;
}
-static int bcs_default_lrc(struct seq_file *m, void *data)
+static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- struct drm_printer p = drm_seq_file_printer(m);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY);
+ xe_pm_runtime_put(gt_to_xe(gt));
- xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY);
return 0;
}
-static int vcs_default_lrc(struct seq_file *m, void *data)
+static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- struct drm_printer p = drm_seq_file_printer(m);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ xe_pm_runtime_put(gt_to_xe(gt));
- xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE);
return 0;
}
-static int vecs_default_lrc(struct seq_file *m, void *data)
+static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- struct drm_printer p = drm_seq_file_printer(m);
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+ xe_pm_runtime_put(gt_to_xe(gt));
- xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE);
return 0;
}
static const struct drm_info_list debugfs_list[] = {
- {"hw_engines", hw_engines, 0},
- {"force_reset", force_reset, 0},
- {"sa_info", sa_info, 0},
- {"topology", topology, 0},
- {"steering", steering, 0},
- {"ggtt", ggtt, 0},
- {"register-save-restore", register_save_restore, 0},
- {"workarounds", workarounds, 0},
- {"pat", pat, 0},
- {"default_lrc_rcs", rcs_default_lrc},
- {"default_lrc_ccs", ccs_default_lrc},
- {"default_lrc_bcs", bcs_default_lrc},
- {"default_lrc_vcs", vcs_default_lrc},
- {"default_lrc_vecs", vecs_default_lrc},
+ {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
+ {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
+ {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
+ {"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
+ {"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
+ {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
+ {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
+ {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
+ {"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
+ {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
+ {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
+ {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
+ {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc},
+ {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc},
};
void xe_gt_debugfs_register(struct xe_gt *gt)
@@ -212,13 +265,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt);
struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
struct dentry *root;
- struct drm_info_list *local;
char name[8];
- int i;
xe_gt_assert(gt, minor->debugfs_root);
- sprintf(name, "gt%d", gt->info.id);
+ snprintf(name, sizeof(name), "gt%d", gt->info.id);
root = debugfs_create_dir(name, minor->debugfs_root);
if (IS_ERR(root)) {
drm_warn(&xe->drm, "Create GT directory failed");
@@ -226,22 +277,13 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
}
/*
- * Allocate local copy as we need to pass in the GT to the debugfs
- * entry and drm_debugfs_create_files just references the drm_info_list
- * passed in (e.g. can't define this on the stack).
+ * Store the xe_gt pointer as private data of the gt/ directory node
+ * so other GT specific attributes under that directory may refer to
+ * it by looking at its parent node private data.
*/
-#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
- local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL);
- if (!local)
- return;
-
- memcpy(local, debugfs_list, DEBUGFS_SIZE);
-#undef DEBUGFS_SIZE
-
- for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
- local[i].data = gt;
+ root->d_inode->i_private = gt;
- drm_debugfs_create_files(local,
+ drm_debugfs_create_files(debugfs_list,
ARRAY_SIZE(debugfs_list),
root, minor);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
index 5a329f118a57..05a6cc93c78c 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -6,8 +6,10 @@
#ifndef _XE_GT_DEBUGFS_H_
#define _XE_GT_DEBUGFS_H_
+struct seq_file;
struct xe_gt;
void xe_gt_debugfs_register(struct xe_gt *gt);
+int xe_gt_debugfs_simple_show(struct seq_file *m, void *data);
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index e5b0f4ecdbe8..855de40e40ea 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -15,6 +15,7 @@
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle_sysfs.h"
#include "xe_guc_pc.h"
+#include "xe_pm.h"
/**
* DOC: Xe GT Frequency Management
@@ -49,12 +50,23 @@ dev_to_pc(struct device *dev)
return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc;
}
+static struct xe_device *
+dev_to_xe(struct device *dev)
+{
+ return gt_to_xe(kobj_to_gt(dev->kobj.parent));
+}
+
static ssize_t act_freq_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct xe_guc_pc *pc = dev_to_pc(dev);
+ u32 freq;
- return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc));
+ xe_pm_runtime_get(dev_to_xe(dev));
+ freq = xe_guc_pc_get_act_freq(pc);
+ xe_pm_runtime_put(dev_to_xe(dev));
+
+ return sysfs_emit(buf, "%d\n", freq);
}
static DEVICE_ATTR_RO(act_freq);
@@ -65,7 +77,9 @@ static ssize_t cur_freq_show(struct device *dev,
u32 freq;
ssize_t ret;
+ xe_pm_runtime_get(dev_to_xe(dev));
ret = xe_guc_pc_get_cur_freq(pc, &freq);
+ xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -77,8 +91,13 @@ static ssize_t rp0_freq_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct xe_guc_pc *pc = dev_to_pc(dev);
+ u32 freq;
- return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc));
+ xe_pm_runtime_get(dev_to_xe(dev));
+ freq = xe_guc_pc_get_rp0_freq(pc);
+ xe_pm_runtime_put(dev_to_xe(dev));
+
+ return sysfs_emit(buf, "%d\n", freq);
}
static DEVICE_ATTR_RO(rp0_freq);
@@ -86,8 +105,13 @@ static ssize_t rpe_freq_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct xe_guc_pc *pc = dev_to_pc(dev);
+ u32 freq;
+
+ xe_pm_runtime_get(dev_to_xe(dev));
+ freq = xe_guc_pc_get_rpe_freq(pc);
+ xe_pm_runtime_put(dev_to_xe(dev));
- return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc));
+ return sysfs_emit(buf, "%d\n", freq);
}
static DEVICE_ATTR_RO(rpe_freq);
@@ -107,7 +131,9 @@ static ssize_t min_freq_show(struct device *dev,
u32 freq;
ssize_t ret;
+ xe_pm_runtime_get(dev_to_xe(dev));
ret = xe_guc_pc_get_min_freq(pc, &freq);
+ xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -125,7 +151,9 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
if (ret)
return ret;
+ xe_pm_runtime_get(dev_to_xe(dev));
ret = xe_guc_pc_set_min_freq(pc, freq);
+ xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -140,7 +168,9 @@ static ssize_t max_freq_show(struct device *dev,
u32 freq;
ssize_t ret;
+ xe_pm_runtime_get(dev_to_xe(dev));
ret = xe_guc_pc_get_max_freq(pc, &freq);
+ xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -158,7 +188,9 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
if (ret)
return ret;
+ xe_pm_runtime_get(dev_to_xe(dev));
ret = xe_guc_pc_set_max_freq(pc, freq);
+ xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -190,33 +222,28 @@ static void freq_fini(struct drm_device *drm, void *arg)
* @gt: Xe GT object
*
* It needs to be initialized after GT Sysfs and GuC PC components are ready.
+ *
+ * Returns: Returns error value for failure and 0 for success.
*/
-void xe_gt_freq_init(struct xe_gt *gt)
+int xe_gt_freq_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
int err;
if (xe->info.skip_guc_pc)
- return;
+ return 0;
gt->freq = kobject_create_and_add("freq0", gt->sysfs);
- if (!gt->freq) {
- drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
- kobject_name(gt->sysfs));
- return;
- }
+ if (!gt->freq)
+ return -ENOMEM;
err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
- if (err) {
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
- return;
- }
+ if (err)
+ return err;
err = sysfs_create_files(gt->freq, freq_attrs);
if (err)
- drm_warn(&xe->drm, "failed to add freq attrs to %s, err: %d\n",
- kobject_name(gt->freq), err);
+ return err;
- xe_gt_throttle_sysfs_init(gt);
+ return xe_gt_throttle_sysfs_init(gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h
index f3fe3c90491a..b7fddbe7b9b6 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.h
+++ b/drivers/gpu/drm/xe/xe_gt_freq.h
@@ -8,6 +8,6 @@
struct xe_gt;
-void xe_gt_freq_init(struct xe_gt *gt);
+int xe_gt_freq_init(struct xe_gt *gt);
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 9fcae65b6469..8fc0f3f6ecc5 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -12,6 +12,7 @@
#include "xe_guc_pc.h"
#include "regs/xe_gt_regs.h"
#include "xe_mmio.h"
+#include "xe_pm.h"
/**
* DOC: Xe GT Idle
@@ -40,6 +41,15 @@ static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle)
return &gtidle_to_gt(gtidle)->uc.guc.pc;
}
+static struct xe_device *
+pc_to_xe(struct xe_guc_pc *pc)
+{
+ struct xe_guc *guc = container_of(pc, struct xe_guc, pc);
+ struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc);
+
+ return gt_to_xe(gt);
+}
+
static const char *gt_idle_state_to_string(enum xe_gt_idle_state state)
{
switch (state) {
@@ -86,8 +96,14 @@ static ssize_t name_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+ struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+ ssize_t ret;
+
+ xe_pm_runtime_get(pc_to_xe(pc));
+ ret = sysfs_emit(buff, "%s\n", gtidle->name);
+ xe_pm_runtime_put(pc_to_xe(pc));
- return sysfs_emit(buff, "%s\n", gtidle->name);
+ return ret;
}
static DEVICE_ATTR_RO(name);
@@ -98,7 +114,9 @@ static ssize_t idle_status_show(struct device *dev,
struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
enum xe_gt_idle_state state;
+ xe_pm_runtime_get(pc_to_xe(pc));
state = gtidle->idle_status(pc);
+ xe_pm_runtime_put(pc_to_xe(pc));
return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state));
}
@@ -111,7 +129,10 @@ static ssize_t idle_residency_ms_show(struct device *dev,
struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
u64 residency;
+ xe_pm_runtime_get(pc_to_xe(pc));
residency = gtidle->idle_residency(pc);
+ xe_pm_runtime_put(pc_to_xe(pc));
+
return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency));
}
static DEVICE_ATTR_RO(idle_residency_ms);
@@ -131,7 +152,7 @@ static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
kobject_put(kobj);
}
-void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
+int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
{
struct xe_gt *gt = gtidle_to_gt(gtidle);
struct xe_device *xe = gt_to_xe(gt);
@@ -139,16 +160,14 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
int err;
kobj = kobject_create_and_add("gtidle", gt->sysfs);
- if (!kobj) {
- drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM);
- return;
- }
+ if (!kobj)
+ return -ENOMEM;
if (xe_gt_is_media_type(gt)) {
- sprintf(gtidle->name, "gt%d-mc", gt->info.id);
+ snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id);
gtidle->idle_residency = xe_guc_pc_mc6_residency;
} else {
- sprintf(gtidle->name, "gt%d-rc", gt->info.id);
+ snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-rc", gt->info.id);
gtidle->idle_residency = xe_guc_pc_rc6_residency;
}
@@ -159,14 +178,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
err = sysfs_create_files(kobj, gt_idle_attrs);
if (err) {
kobject_put(kobj);
- drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err);
- return;
+ return err;
}
- err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
- if (err)
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
+ return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
}
void xe_gt_idle_enable_c6(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 69280fd16b03..75bd99659b1b 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -10,7 +10,7 @@
struct xe_gt;
-void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
+int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
void xe_gt_idle_enable_c6(struct xe_gt *gt);
void xe_gt_idle_disable_c6(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index a7ab9ba645f9..577bd7043740 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -6,6 +6,7 @@
#include "xe_gt_mcr.h"
#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_gt_topology.h"
#include "xe_gt_types.h"
@@ -294,14 +295,40 @@ static void init_steering_mslice(struct xe_gt *gt)
gt->steering[LNCF].instance_target = 0; /* unused */
}
-static void init_steering_dss(struct xe_gt *gt)
+static unsigned int dss_per_group(struct xe_gt *gt)
{
- unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
- xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
- unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
+ if (gt_to_xe(gt)->info.platform == XE_PVC)
+ return 8;
+ else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+ return 4;
+ else
+ return 6;
+}
- gt->steering[DSS].group_target = dss / dss_per_grp;
- gt->steering[DSS].instance_target = dss % dss_per_grp;
+/**
+ * xe_gt_mcr_get_dss_steering - Get the group/instance steering for a DSS
+ * @gt: GT structure
+ * @dss: DSS ID to obtain steering for
+ * @group: pointer to storage for steering group ID
+ * @instance: pointer to storage for steering instance ID
+ */
+void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
+{
+ int dss_per_grp = dss_per_group(gt);
+
+ xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS);
+
+ *group = dss / dss_per_grp;
+ *instance = dss % dss_per_grp;
+}
+
+static void init_steering_dss(struct xe_gt *gt)
+{
+ xe_gt_mcr_get_dss_steering(gt,
+ min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
+ xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)),
+ &gt->steering[DSS].group_target,
+ &gt->steering[DSS].instance_target);
}
static void init_steering_oaddrm(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index 27ca1bc880a0..a7f4ab1aa584 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -7,6 +7,7 @@
#define _XE_GT_MCR_H_
#include "regs/xe_reg_defs.h"
+#include "xe_gt_topology.h"
struct drm_printer;
struct xe_gt;
@@ -25,5 +26,18 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
u32 value);
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
+
+/*
+ * Loop over each DSS and determine the group and instance IDs that
+ * should be used to steer MCR accesses toward this DSS.
+ * @dss: DSS ID to obtain steering for
+ * @gt: GT structure
+ * @group: steering group ID, data type: u16
+ * @instance: steering instance ID, data type: u16
+ */
+#define for_each_dss_steering(dss, gt, group, instance) \
+ for_each_dss((dss), (gt)) \
+ for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true))
#endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
new file mode 100644
index 000000000000..791dcdd767e2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_gt_sriov_pf.h"
+#include "xe_gt_sriov_pf_helpers.h"
+
+/*
+ * VF's metadata is maintained in the flexible array where:
+ * - entry [0] contains metadata for the PF (only if applicable),
+ * - entries [1..n] contain metadata for VF1..VFn::
+ *
+ * <--------------------------- 1 + total_vfs ----------->
+ * +-------+-------+-------+-----------------------+-------+
+ * | 0 | 1 | 2 | | n |
+ * +-------+-------+-------+-----------------------+-------+
+ * | PF | VF1 | VF2 | ... ... | VFn |
+ * +-------+-------+-------+-----------------------+-------+
+ */
+static int pf_alloc_metadata(struct xe_gt *gt)
+{
+ unsigned int num_vfs = xe_gt_sriov_pf_get_totalvfs(gt);
+
+ gt->sriov.pf.vfs = drmm_kcalloc(&gt_to_xe(gt)->drm, 1 + num_vfs,
+ sizeof(*gt->sriov.pf.vfs), GFP_KERNEL);
+ if (!gt->sriov.pf.vfs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF.
+ * @gt: the &xe_gt to initialize
+ *
+ * Early initialization of the PF data.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
+{
+ int err;
+
+ err = pf_alloc_metadata(gt);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
new file mode 100644
index 000000000000..05142ffc4319
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_H_
+#define _XE_GT_SRIOV_PF_H_
+
+struct xe_gt;
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
+#else
+static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
new file mode 100644
index 000000000000..79116ad58620
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -0,0 +1,1977 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <linux/string_choices.h>
+#include <linux/wordpart.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_klvs_abi.h"
+
+#include "regs/xe_guc_regs.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_policy.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_db_mgr.h"
+#include "xe_guc_fwif.h"
+#include "xe_guc_id_mgr.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_guc_submit.h"
+#include "xe_lmtt.h"
+#include "xe_map.h"
+#include "xe_sriov.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_wopcm.h"
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ * negative error code on failure.
+ */
+static int guc_action_update_vf_cfg(struct xe_guc *guc, u32 vfid,
+ u64 addr, u32 size)
+{
+ u32 request[] = {
+ GUC_ACTION_PF2GUC_UPDATE_VF_CFG,
+ vfid,
+ lower_32_bits(addr),
+ upper_32_bits(addr),
+ size,
+ };
+
+ return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+}
+
+/*
+ * Return: 0 on success, negative error code on failure.
+ */
+static int pf_send_vf_cfg_reset(struct xe_gt *gt, u32 vfid)
+{
+ struct xe_guc *guc = &gt->uc.guc;
+ int ret;
+
+ ret = guc_action_update_vf_cfg(guc, vfid, 0, 0);
+
+ return ret <= 0 ? ret : -EPROTO;
+}
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ * negative error code on failure.
+ */
+static int pf_send_vf_cfg_klvs(struct xe_gt *gt, u32 vfid, const u32 *klvs, u32 num_dwords)
+{
+ const u32 bytes = num_dwords * sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(bytes, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes);
+
+ ret = guc_action_update_vf_cfg(guc, vfid, xe_bo_ggtt_addr(bo), num_dwords);
+
+ xe_bo_unpin_map_no_vm(bo);
+
+ return ret;
+}
+
+/*
+ * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed,
+ * negative error code on failure.
+ */
+static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs,
+ const u32 *klvs, u32 num_dwords)
+{
+ int ret;
+
+ xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords));
+
+ ret = pf_send_vf_cfg_klvs(gt, vfid, klvs, num_dwords);
+
+ if (ret != num_klvs) {
+ int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO;
+ struct drm_printer p = xe_gt_info_printer(gt);
+ char name[8];
+
+ xe_gt_sriov_notice(gt, "Failed to push %s %u config KLV%s (%pe)\n",
+ xe_sriov_function_name(vfid, name, sizeof(name)),
+ num_klvs, str_plural(num_klvs), ERR_PTR(err));
+ xe_guc_klv_print(klvs, num_dwords, &p);
+ return err;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ struct drm_printer p = xe_gt_info_printer(gt);
+
+ xe_guc_klv_print(klvs, num_dwords, &p);
+ }
+
+ return 0;
+}
+
+static int pf_push_vf_cfg_u32(struct xe_gt *gt, unsigned int vfid, u16 key, u32 value)
+{
+ u32 klv[] = {
+ FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 1),
+ value,
+ };
+
+ return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv));
+}
+
+static int pf_push_vf_cfg_u64(struct xe_gt *gt, unsigned int vfid, u16 key, u64 value)
+{
+ u32 klv[] = {
+ FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 2),
+ lower_32_bits(value),
+ upper_32_bits(value),
+ };
+
+ return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv));
+}
+
+static int pf_push_vf_cfg_ggtt(struct xe_gt *gt, unsigned int vfid, u64 start, u64 size)
+{
+ u32 klvs[] = {
+ PREP_GUC_KLV_TAG(VF_CFG_GGTT_START),
+ lower_32_bits(start),
+ upper_32_bits(start),
+ PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE),
+ lower_32_bits(size),
+ upper_32_bits(size),
+ };
+
+ return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
+}
+
+static int pf_push_vf_cfg_ctxs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num)
+{
+ u32 klvs[] = {
+ PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID),
+ begin,
+ PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS),
+ num,
+ };
+
+ return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
+}
+
+static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num)
+{
+ u32 klvs[] = {
+ PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID),
+ begin,
+ PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS),
+ num,
+ };
+
+ return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
+}
+
+static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum)
+{
+ return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum);
+}
+
+static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout)
+{
+ return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout);
+}
+
+static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+ return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size);
+}
+
+static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return &gt->sriov.pf.vfs[vfid].config;
+}
+
+/* Return: number of configuration dwords written */
+static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config)
+{
+ u32 n = 0;
+
+ if (drm_mm_node_allocated(&config->ggtt_region)) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
+ cfg[n++] = lower_32_bits(config->ggtt_region.start);
+ cfg[n++] = upper_32_bits(config->ggtt_region.start);
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE);
+ cfg[n++] = lower_32_bits(config->ggtt_region.size);
+ cfg[n++] = upper_32_bits(config->ggtt_region.size);
+ }
+
+ return n;
+}
+
+/* Return: number of configuration dwords written */
+static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config)
+{
+ u32 n = 0;
+
+ n += encode_config_ggtt(cfg, config);
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID);
+ cfg[n++] = config->begin_ctx;
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS);
+ cfg[n++] = config->num_ctxs;
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID);
+ cfg[n++] = config->begin_db;
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS);
+ cfg[n++] = config->num_dbs;
+
+ if (config->lmem_obj) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE);
+ cfg[n++] = lower_32_bits(config->lmem_obj->size);
+ cfg[n++] = upper_32_bits(config->lmem_obj->size);
+ }
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM);
+ cfg[n++] = config->exec_quantum;
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT);
+ cfg[n++] = config->preempt_timeout;
+
+ return n;
+}
+
+static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ u32 max_cfg_dwords = SZ_4K / sizeof(u32);
+ u32 num_dwords;
+ int num_klvs;
+ u32 *cfg;
+ int err;
+
+ cfg = kcalloc(max_cfg_dwords, sizeof(u32), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ num_dwords = encode_config(cfg, config);
+ xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
+
+ if (xe_gt_is_media_type(gt)) {
+ struct xe_gt *primary = gt->tile->primary_gt;
+ struct xe_gt_sriov_config *other = pf_pick_vf_config(primary, vfid);
+
+ /* media-GT will never include a GGTT config */
+ xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config));
+
+ /* the GGTT config must be taken from the primary-GT instead */
+ num_dwords += encode_config_ggtt(cfg + num_dwords, other);
+ }
+ xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
+
+ num_klvs = xe_guc_klv_count(cfg, num_dwords);
+ err = pf_push_vf_cfg_klvs(gt, vfid, num_klvs, cfg, num_dwords);
+
+ kfree(cfg);
+ return err;
+}
+
+static u64 pf_get_ggtt_alignment(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ return IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+}
+
+static u64 pf_get_min_spare_ggtt(struct xe_gt *gt)
+{
+ /* XXX: preliminary */
+ return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ?
+ pf_get_ggtt_alignment(gt) : SZ_64M;
+}
+
+static u64 pf_get_spare_ggtt(struct xe_gt *gt)
+{
+ u64 spare;
+
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ spare = gt->sriov.pf.spare.ggtt_size;
+ spare = max_t(u64, spare, pf_get_min_spare_ggtt(gt));
+
+ return spare;
+}
+
+static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size)
+{
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (size && size < pf_get_min_spare_ggtt(gt))
+ return -EINVAL;
+
+ size = round_up(size, pf_get_ggtt_alignment(gt));
+ gt->sriov.pf.spare.ggtt_size = size;
+
+ return 0;
+}
+
+static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u64 start, u64 size)
+{
+ int err, err2 = 0;
+
+ err = pf_push_vf_cfg_ggtt(tile->primary_gt, vfid, start, size);
+
+ if (tile->media_gt && !err)
+ err2 = pf_push_vf_cfg_ggtt(tile->media_gt, vfid, start, size);
+
+ return err ?: err2;
+}
+
+static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node)
+{
+ struct xe_ggtt *ggtt = tile->mem.ggtt;
+
+ if (drm_mm_node_allocated(node)) {
+ /*
+ * explicit GGTT PTE assignment to the PF using xe_ggtt_assign()
+ * is redundant, as PTE will be implicitly re-assigned to PF by
+ * the xe_ggtt_clear() called by below xe_ggtt_remove_node().
+ */
+ xe_ggtt_remove_node(ggtt, node, false);
+ }
+}
+
+static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+ pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region);
+}
+
+static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ struct drm_mm_node *node = &config->ggtt_region;
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_ggtt *ggtt = tile->mem.ggtt;
+ u64 alignment = pf_get_ggtt_alignment(gt);
+ int err;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ size = round_up(size, alignment);
+
+ if (drm_mm_node_allocated(node)) {
+ err = pf_distribute_config_ggtt(tile, vfid, 0, 0);
+ if (unlikely(err))
+ return err;
+
+ pf_release_ggtt(tile, node);
+ }
+ xe_gt_assert(gt, !drm_mm_node_allocated(node));
+
+ if (!size)
+ return 0;
+
+ err = xe_ggtt_insert_special_node(ggtt, node, size, alignment);
+ if (unlikely(err))
+ return err;
+
+ xe_ggtt_assign(ggtt, node, vfid);
+ xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n",
+ vfid, node->start, node->start + node->size - 1);
+
+ err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size);
+ if (unlikely(err))
+ return err;
+
+ return 0;
+}
+
+static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ struct drm_mm_node *node = &config->ggtt_region;
+
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ return drm_mm_node_allocated(node) ? node->size : 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_ggtt - Query size of GGTT address space of the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: size of the VF's assigned (or PF's spare) GGTT address space.
+ */
+u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid)
+{
+ u64 size;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ size = pf_get_vf_config_ggtt(gt_to_tile(gt)->primary_gt, vfid);
+ else
+ size = pf_get_spare_ggtt(gt_to_tile(gt)->primary_gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return size;
+}
+
+static int pf_config_set_u64_done(struct xe_gt *gt, unsigned int vfid, u64 value,
+ u64 actual, const char *what, int err)
+{
+ char size[10];
+ char name[8];
+
+ xe_sriov_function_name(vfid, name, sizeof(name));
+
+ if (unlikely(err)) {
+ string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size));
+ xe_gt_sriov_notice(gt, "Failed to provision %s with %llu (%s) %s (%pe)\n",
+ name, value, size, what, ERR_PTR(err));
+ string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size));
+ xe_gt_sriov_info(gt, "%s provisioning remains at %llu (%s) %s\n",
+ name, actual, size, what);
+ return err;
+ }
+
+ /* the actual value may have changed during provisioning */
+ string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size));
+ xe_gt_sriov_info(gt, "%s provisioned with %llu (%s) %s\n",
+ name, actual, size, what);
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_ggtt - Provision VF with GGTT space.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: the VF identifier
+ * @size: requested GGTT size
+ *
+ * If &vfid represents PF, then function will change PF's spare GGTT config.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+ int err;
+
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ err = pf_provision_vf_ggtt(gt, vfid, size);
+ else
+ err = pf_set_spare_ggtt(gt, size);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u64_done(gt, vfid, size,
+ xe_gt_sriov_pf_config_get_ggtt(gt, vfid),
+ vfid ? "GGTT" : "spare GGTT", err);
+}
+
+static int pf_config_bulk_set_u64_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs,
+ u64 value, u64 (*get)(struct xe_gt*, unsigned int),
+ const char *what, unsigned int last, int err)
+{
+ char size[10];
+
+ xe_gt_assert(gt, first);
+ xe_gt_assert(gt, num_vfs);
+ xe_gt_assert(gt, first <= last);
+
+ if (num_vfs == 1)
+ return pf_config_set_u64_done(gt, first, value, get(gt, first), what, err);
+
+ if (unlikely(err)) {
+ xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n",
+ first, first + num_vfs - 1, what);
+ if (last > first)
+ pf_config_bulk_set_u64_done(gt, first, last - first, value,
+ get, what, last, 0);
+ return pf_config_set_u64_done(gt, last, value, get(gt, last), what, err);
+ }
+
+ /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */
+ value = get(gt, first);
+ string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size));
+ xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %llu (%s) %s\n",
+ first, first + num_vfs - 1, value, size, what);
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_ggtt - Provision many VFs with GGTT.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ * @size: requested GGTT size
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs, u64 size)
+{
+ unsigned int n;
+ int err = 0;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+ if (!num_vfs)
+ return 0;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ for (n = vfid; n < vfid + num_vfs; n++) {
+ err = pf_provision_vf_ggtt(gt, n, size);
+ if (err)
+ break;
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size,
+ xe_gt_sriov_pf_config_get_ggtt,
+ "GGTT", n, err);
+}
+
+/* Return: size of the largest continuous GGTT region */
+static u64 pf_get_max_ggtt(struct xe_gt *gt)
+{
+ struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt;
+ const struct drm_mm *mm = &ggtt->mm;
+ const struct drm_mm_node *entry;
+ u64 alignment = pf_get_ggtt_alignment(gt);
+ u64 spare = pf_get_spare_ggtt(gt);
+ u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt));
+ u64 hole_start, hole_end, hole_size;
+ u64 max_hole = 0;
+
+ mutex_lock(&ggtt->lock);
+
+ drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
+ hole_start = max(hole_start, hole_min_start);
+ hole_start = ALIGN(hole_start, alignment);
+ hole_end = ALIGN_DOWN(hole_end, alignment);
+ if (hole_start >= hole_end)
+ continue;
+ hole_size = hole_end - hole_start;
+ xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n",
+ hole_start, hole_size / SZ_1K);
+ spare -= min3(spare, hole_size, max_hole);
+ max_hole = max(max_hole, hole_size);
+ }
+
+ mutex_unlock(&ggtt->lock);
+
+ xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n",
+ max_hole / SZ_1K, spare / SZ_1K);
+ return max_hole > spare ? max_hole - spare : 0;
+}
+
+static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs)
+{
+ u64 available = pf_get_max_ggtt(gt);
+ u64 alignment = pf_get_ggtt_alignment(gt);
+ u64 fair;
+
+ /*
+ * To simplify the logic we only look at single largest GGTT region
+ * as that will be always the best fit for 1 VF case, and most likely
+ * will also nicely cover other cases where VFs are provisioned on the
+ * fresh and idle PF driver, without any stale GGTT allocations spread
+ * in the middle of the full GGTT range.
+ */
+
+ fair = div_u64(available, num_vfs);
+ fair = ALIGN_DOWN(fair, alignment);
+ xe_gt_sriov_dbg_verbose(gt, "GGTT available(%lluK) fair(%u x %lluK)\n",
+ available / SZ_1K, num_vfs, fair / SZ_1K);
+ return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs)
+{
+ u64 fair;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, num_vfs);
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ fair = pf_estimate_fair_ggtt(gt, num_vfs);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (!fair)
+ return -ENOSPC;
+
+ return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair);
+}
+
+static u32 pf_get_min_spare_ctxs(struct xe_gt *gt)
+{
+ /* XXX: preliminary */
+ return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ?
+ hweight64(gt->info.engine_mask) : SZ_256;
+}
+
+static u32 pf_get_spare_ctxs(struct xe_gt *gt)
+{
+ u32 spare;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ spare = gt->sriov.pf.spare.num_ctxs;
+ spare = max_t(u32, spare, pf_get_min_spare_ctxs(gt));
+
+ return spare;
+}
+
+static int pf_set_spare_ctxs(struct xe_gt *gt, u32 spare)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (spare > GUC_ID_MAX)
+ return -EINVAL;
+
+ if (spare && spare < pf_get_min_spare_ctxs(gt))
+ return -EINVAL;
+
+ gt->sriov.pf.spare.num_ctxs = spare;
+
+ return 0;
+}
+
+/* Return: start ID or negative error code on failure */
+static int pf_reserve_ctxs(struct xe_gt *gt, u32 num)
+{
+ struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm;
+ unsigned int spare = pf_get_spare_ctxs(gt);
+
+ return xe_guc_id_mgr_reserve(idm, num, spare);
+}
+
+static void pf_release_ctxs(struct xe_gt *gt, u32 start, u32 num)
+{
+ struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm;
+
+ if (num)
+ xe_guc_id_mgr_release(idm, start, num);
+}
+
+static void pf_release_config_ctxs(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ pf_release_ctxs(gt, config->begin_ctx, config->num_ctxs);
+ config->begin_ctx = 0;
+ config->num_ctxs = 0;
+}
+
+static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int ret;
+
+ xe_gt_assert(gt, vfid);
+
+ if (num_ctxs > GUC_ID_MAX)
+ return -EINVAL;
+
+ if (config->num_ctxs) {
+ ret = pf_push_vf_cfg_ctxs(gt, vfid, 0, 0);
+ if (unlikely(ret))
+ return ret;
+
+ pf_release_config_ctxs(gt, config);
+ }
+
+ if (!num_ctxs)
+ return 0;
+
+ ret = pf_reserve_ctxs(gt, num_ctxs);
+ if (unlikely(ret < 0))
+ return ret;
+
+ config->begin_ctx = ret;
+ config->num_ctxs = num_ctxs;
+
+ ret = pf_push_vf_cfg_ctxs(gt, vfid, config->begin_ctx, config->num_ctxs);
+ if (unlikely(ret)) {
+ pf_release_config_ctxs(gt, config);
+ return ret;
+ }
+
+ xe_gt_sriov_dbg_verbose(gt, "VF%u contexts %u-%u\n",
+ vfid, config->begin_ctx, config->begin_ctx + config->num_ctxs - 1);
+ return 0;
+}
+
+static u32 pf_get_vf_config_ctxs(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ return config->num_ctxs;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_ctxs - Get VF's GuC contexts IDs quota.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ * If &vfid represents a PF then number of PF's spare GuC context IDs is returned.
+ *
+ * Return: VF's quota (or PF's spare).
+ */
+u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 num_ctxs;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ num_ctxs = pf_get_vf_config_ctxs(gt, vfid);
+ else
+ num_ctxs = pf_get_spare_ctxs(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return num_ctxs;
+}
+
+static const char *no_unit(u32 unused)
+{
+ return "";
+}
+
+static const char *spare_unit(u32 unused)
+{
+ return " spare";
+}
+
+static int pf_config_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 value, u32 actual,
+ const char *what, const char *(*unit)(u32), int err)
+{
+ char name[8];
+
+ xe_sriov_function_name(vfid, name, sizeof(name));
+
+ if (unlikely(err)) {
+ xe_gt_sriov_notice(gt, "Failed to provision %s with %u%s %s (%pe)\n",
+ name, value, unit(value), what, ERR_PTR(err));
+ xe_gt_sriov_info(gt, "%s provisioning remains at %u%s %s\n",
+ name, actual, unit(actual), what);
+ return err;
+ }
+
+ /* the actual value may have changed during provisioning */
+ xe_gt_sriov_info(gt, "%s provisioned with %u%s %s\n",
+ name, actual, unit(actual), what);
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_ctxs - Configure GuC contexts IDs quota for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @num_ctxs: requested number of GuC contexts IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ err = pf_provision_vf_ctxs(gt, vfid, num_ctxs);
+ else
+ err = pf_set_spare_ctxs(gt, num_ctxs);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u32_done(gt, vfid, num_ctxs,
+ xe_gt_sriov_pf_config_get_ctxs(gt, vfid),
+ "GuC context IDs", vfid ? no_unit : spare_unit, err);
+}
+
+static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs,
+ u32 value, u32 (*get)(struct xe_gt*, unsigned int),
+ const char *what, const char *(*unit)(u32),
+ unsigned int last, int err)
+{
+ xe_gt_assert(gt, first);
+ xe_gt_assert(gt, num_vfs);
+ xe_gt_assert(gt, first <= last);
+
+ if (num_vfs == 1)
+ return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err);
+
+ if (unlikely(err)) {
+ xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n",
+ first, first + num_vfs - 1, what);
+ if (last > first)
+ pf_config_bulk_set_u32_done(gt, first, last - first, value,
+ get, what, unit, last, 0);
+ return pf_config_set_u32_done(gt, last, value, get(gt, last), what, unit, err);
+ }
+
+ /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */
+ value = get(gt, first);
+ xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n",
+ first, first + num_vfs - 1, value, unit(value), what);
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_ctxs - Provision many VFs with GuC context IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier
+ * @num_vfs: number of VFs to provision
+ * @num_ctxs: requested number of GuC contexts IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs, u32 num_ctxs)
+{
+ unsigned int n;
+ int err = 0;
+
+ xe_gt_assert(gt, vfid);
+
+ if (!num_vfs)
+ return 0;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ for (n = vfid; n < vfid + num_vfs; n++) {
+ err = pf_provision_vf_ctxs(gt, n, num_ctxs);
+ if (err)
+ break;
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_ctxs,
+ xe_gt_sriov_pf_config_get_ctxs,
+ "GuC context IDs", no_unit, n, err);
+}
+
+static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs)
+{
+ struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm;
+ u32 spare = pf_get_spare_ctxs(gt);
+ u32 fair = (idm->total - spare) / num_vfs;
+ int ret;
+
+ for (; fair; --fair) {
+ ret = xe_guc_id_mgr_reserve(idm, fair * num_vfs, spare);
+ if (ret < 0)
+ continue;
+ xe_guc_id_mgr_release(idm, ret, fair * num_vfs);
+ break;
+ }
+
+ xe_gt_sriov_dbg_verbose(gt, "contexts fair(%u x %u)\n", num_vfs, fair);
+ return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_ctxs - Provision many VFs with fair GuC context IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs)
+{
+ u32 fair;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, num_vfs);
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ fair = pf_estimate_fair_ctxs(gt, num_vfs);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (!fair)
+ return -ENOSPC;
+
+ return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair);
+}
+
+static u32 pf_get_min_spare_dbs(struct xe_gt *gt)
+{
+ /* XXX: preliminary, we don't use doorbells yet! */
+ return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 1 : 0;
+}
+
+static u32 pf_get_spare_dbs(struct xe_gt *gt)
+{
+ u32 spare;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ spare = gt->sriov.pf.spare.num_dbs;
+ spare = max_t(u32, spare, pf_get_min_spare_dbs(gt));
+
+ return spare;
+}
+
+static int pf_set_spare_dbs(struct xe_gt *gt, u32 spare)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (spare > GUC_NUM_DOORBELLS)
+ return -EINVAL;
+
+ if (spare && spare < pf_get_min_spare_dbs(gt))
+ return -EINVAL;
+
+ gt->sriov.pf.spare.num_dbs = spare;
+ return 0;
+}
+
+/* Return: start ID or negative error code on failure */
+static int pf_reserve_dbs(struct xe_gt *gt, u32 num)
+{
+ struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm;
+ unsigned int spare = pf_get_spare_dbs(gt);
+
+ return xe_guc_db_mgr_reserve_range(dbm, num, spare);
+}
+
+static void pf_release_dbs(struct xe_gt *gt, u32 start, u32 num)
+{
+ struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm;
+
+ if (num)
+ xe_guc_db_mgr_release_range(dbm, start, num);
+}
+
+static void pf_release_config_dbs(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ pf_release_dbs(gt, config->begin_db, config->num_dbs);
+ config->begin_db = 0;
+ config->num_dbs = 0;
+}
+
+static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int ret;
+
+ xe_gt_assert(gt, vfid);
+
+ if (num_dbs > GUC_NUM_DOORBELLS)
+ return -EINVAL;
+
+ if (config->num_dbs) {
+ ret = pf_push_vf_cfg_dbs(gt, vfid, 0, 0);
+ if (unlikely(ret))
+ return ret;
+
+ pf_release_config_dbs(gt, config);
+ }
+
+ if (!num_dbs)
+ return 0;
+
+ ret = pf_reserve_dbs(gt, num_dbs);
+ if (unlikely(ret < 0))
+ return ret;
+
+ config->begin_db = ret;
+ config->num_dbs = num_dbs;
+
+ ret = pf_push_vf_cfg_dbs(gt, vfid, config->begin_db, config->num_dbs);
+ if (unlikely(ret)) {
+ pf_release_config_dbs(gt, config);
+ return ret;
+ }
+
+ xe_gt_sriov_dbg_verbose(gt, "VF%u doorbells %u-%u\n",
+ vfid, config->begin_db, config->begin_db + config->num_dbs - 1);
+ return 0;
+}
+
+static u32 pf_get_vf_config_dbs(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ return config->num_dbs;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_dbs - Get VF's GuC doorbells IDs quota.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ * If &vfid represents a PF then number of PF's spare GuC doorbells IDs is returned.
+ *
+ * Return: VF's quota (or PF's spare).
+ */
+u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 num_dbs;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ num_dbs = pf_get_vf_config_dbs(gt, vfid);
+ else
+ num_dbs = pf_get_spare_dbs(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return num_dbs;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_dbs - Configure GuC doorbells IDs quota for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @num_dbs: requested number of GuC doorbells IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs)
+{
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ err = pf_provision_vf_dbs(gt, vfid, num_dbs);
+ else
+ err = pf_set_spare_dbs(gt, num_dbs);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u32_done(gt, vfid, num_dbs,
+ xe_gt_sriov_pf_config_get_dbs(gt, vfid),
+ "GuC doorbell IDs", vfid ? no_unit : spare_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_dbs - Provision many VFs with GuC context IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ * @num_dbs: requested number of GuC doorbell IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs, u32 num_dbs)
+{
+ unsigned int n;
+ int err = 0;
+
+ xe_gt_assert(gt, vfid);
+
+ if (!num_vfs)
+ return 0;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ for (n = vfid; n < vfid + num_vfs; n++) {
+ err = pf_provision_vf_dbs(gt, n, num_dbs);
+ if (err)
+ break;
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_dbs,
+ xe_gt_sriov_pf_config_get_dbs,
+ "GuC doorbell IDs", no_unit, n, err);
+}
+
+static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs)
+{
+ struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm;
+ u32 spare = pf_get_spare_dbs(gt);
+ u32 fair = (GUC_NUM_DOORBELLS - spare) / num_vfs;
+ int ret;
+
+ for (; fair; --fair) {
+ ret = xe_guc_db_mgr_reserve_range(dbm, fair * num_vfs, spare);
+ if (ret < 0)
+ continue;
+ xe_guc_db_mgr_release_range(dbm, ret, fair * num_vfs);
+ break;
+ }
+
+ xe_gt_sriov_dbg_verbose(gt, "doorbells fair(%u x %u)\n", num_vfs, fair);
+ return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_dbs - Provision many VFs with fair GuC doorbell IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs)
+{
+ u32 fair;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, num_vfs);
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ fair = pf_estimate_fair_dbs(gt, num_vfs);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (!fair)
+ return -ENOSPC;
+
+ return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair);
+}
+
+static u64 pf_get_lmem_alignment(struct xe_gt *gt)
+{
+ /* this might be platform dependent */
+ return SZ_2M;
+}
+
+static u64 pf_get_min_spare_lmem(struct xe_gt *gt)
+{
+ /* this might be platform dependent */
+ return SZ_128M; /* XXX: preliminary */
+}
+
+static u64 pf_get_spare_lmem(struct xe_gt *gt)
+{
+ u64 spare;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ spare = gt->sriov.pf.spare.lmem_size;
+ spare = max_t(u64, spare, pf_get_min_spare_lmem(gt));
+
+ return spare;
+}
+
+static int pf_set_spare_lmem(struct xe_gt *gt, u64 size)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (size && size < pf_get_min_spare_lmem(gt))
+ return -EINVAL;
+
+ gt->sriov.pf.spare.lmem_size = size;
+ return 0;
+}
+
+static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ struct xe_bo *bo;
+
+ bo = config->lmem_obj;
+ return bo ? bo->size : 0;
+}
+
+static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_tile *tile;
+ unsigned int tid;
+ int err;
+
+ for_each_tile(tile, xe, tid) {
+ if (tile->primary_gt == gt) {
+ err = pf_push_vf_cfg_lmem(gt, vfid, size);
+ } else {
+ u64 lmem = pf_get_vf_config_lmem(tile->primary_gt, vfid);
+
+ if (!lmem)
+ continue;
+ err = pf_push_vf_cfg_lmem(gt, vfid, lmem);
+ }
+ if (unlikely(err))
+ return err;
+ }
+ return 0;
+}
+
+static void pf_force_lmtt_invalidate(struct xe_device *xe)
+{
+ /* TODO */
+}
+
+static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_lmtt *lmtt;
+ struct xe_tile *tile;
+ unsigned int tid;
+
+ for_each_tile(tile, xe, tid) {
+ lmtt = &tile->sriov.pf.lmtt;
+ xe_lmtt_drop_pages(lmtt, vfid);
+ }
+}
+
+static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config;
+ struct xe_tile *tile;
+ struct xe_lmtt *lmtt;
+ struct xe_bo *bo;
+ struct xe_gt *gt;
+ u64 total, offset;
+ unsigned int gtid;
+ unsigned int tid;
+ int err;
+
+ total = 0;
+ for_each_tile(tile, xe, tid)
+ total += pf_get_vf_config_lmem(tile->primary_gt, vfid);
+
+ for_each_tile(tile, xe, tid) {
+ lmtt = &tile->sriov.pf.lmtt;
+
+ xe_lmtt_drop_pages(lmtt, vfid);
+ if (!total)
+ continue;
+
+ err = xe_lmtt_prepare_pages(lmtt, vfid, total);
+ if (err)
+ goto fail;
+
+ offset = 0;
+ for_each_gt(gt, xe, gtid) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ config = pf_pick_vf_config(gt, vfid);
+ bo = config->lmem_obj;
+ if (!bo)
+ continue;
+
+ err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset);
+ if (err)
+ goto fail;
+ offset += bo->size;
+ }
+ }
+
+ pf_force_lmtt_invalidate(xe);
+ return 0;
+
+fail:
+ for_each_tile(tile, xe, tid) {
+ lmtt = &tile->sriov.pf.lmtt;
+ xe_lmtt_drop_pages(lmtt, vfid);
+ }
+ return err;
+}
+
+static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (config->lmem_obj) {
+ xe_bo_unpin_map_no_vm(config->lmem_obj);
+ config->lmem_obj = NULL;
+ }
+}
+
+static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ int err;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+ size = round_up(size, pf_get_lmem_alignment(gt));
+
+ if (config->lmem_obj) {
+ err = pf_distribute_config_lmem(gt, vfid, 0);
+ if (unlikely(err))
+ return err;
+
+ pf_reset_vf_lmtt(xe, vfid);
+ pf_release_vf_config_lmem(gt, config);
+ }
+ xe_gt_assert(gt, !config->lmem_obj);
+
+ if (!size)
+ return 0;
+
+ xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M);
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ config->lmem_obj = bo;
+
+ err = pf_update_vf_lmtt(xe, vfid);
+ if (unlikely(err))
+ goto release;
+
+ err = pf_push_vf_cfg_lmem(gt, vfid, bo->size);
+ if (unlikely(err))
+ goto reset_lmtt;
+
+ xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n",
+ vfid, bo->size, bo->size / SZ_1M);
+ return 0;
+
+reset_lmtt:
+ pf_reset_vf_lmtt(xe, vfid);
+release:
+ pf_release_vf_config_lmem(gt, config);
+ return err;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_lmem - Get VF's LMEM quota.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's spare) LMEM quota.
+ */
+u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid)
+{
+ u64 size;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ size = pf_get_vf_config_lmem(gt, vfid);
+ else
+ size = pf_get_spare_lmem(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return size;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_lmem - Provision VF with LMEM.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: the VF identifier
+ * @size: requested LMEM size
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (vfid)
+ err = pf_provision_vf_lmem(gt, vfid, size);
+ else
+ err = pf_set_spare_lmem(gt, size);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u64_done(gt, vfid, size,
+ xe_gt_sriov_pf_config_get_lmem(gt, vfid),
+ vfid ? "LMEM" : "spare LMEM", err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_lmem - Provision many VFs with LMEM.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ * @size: requested LMEM size
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs, u64 size)
+{
+ unsigned int n;
+ int err = 0;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+ if (!num_vfs)
+ return 0;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ for (n = vfid; n < vfid + num_vfs; n++) {
+ err = pf_provision_vf_lmem(gt, n, size);
+ if (err)
+ break;
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size,
+ xe_gt_sriov_pf_config_get_lmem,
+ "LMEM", n, err);
+}
+
+static u64 pf_query_free_lmem(struct xe_gt *gt)
+{
+ struct xe_tile *tile = gt->tile;
+
+ return xe_ttm_vram_get_avail(&tile->mem.vram_mgr->manager);
+}
+
+static u64 pf_query_max_lmem(struct xe_gt *gt)
+{
+ u64 alignment = pf_get_lmem_alignment(gt);
+ u64 spare = pf_get_spare_lmem(gt);
+ u64 free = pf_query_free_lmem(gt);
+ u64 avail;
+
+ /* XXX: need to account for 2MB blocks only */
+ avail = free > spare ? free - spare : 0;
+ avail = round_down(avail, alignment);
+
+ return avail;
+}
+
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */
+#else
+#define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */
+#endif
+
+static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs)
+{
+ u64 available = pf_query_max_lmem(gt);
+ u64 alignment = pf_get_lmem_alignment(gt);
+ u64 fair;
+
+ fair = div_u64(available, num_vfs);
+ fair = ALIGN_DOWN(fair, alignment);
+#ifdef MAX_FAIR_LMEM
+ fair = min_t(u64, MAX_FAIR_LMEM, fair);
+#endif
+ xe_gt_sriov_dbg_verbose(gt, "LMEM available(%lluM) fair(%u x %lluM)\n",
+ available / SZ_1M, num_vfs, fair / SZ_1M);
+ return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_lmem - Provision many VFs with fair LMEM.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs)
+{
+ u64 fair;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, num_vfs);
+ xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+ if (!IS_DGFX(gt_to_xe(gt)))
+ return 0;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ fair = pf_estimate_fair_lmem(gt, num_vfs);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (!fair)
+ return -ENOSPC;
+
+ return xe_gt_sriov_pf_config_bulk_set_lmem(gt, vfid, num_vfs, fair);
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair - Provision many VFs with fair resources.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid,
+ unsigned int num_vfs)
+{
+ int result = 0;
+ int err;
+
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, num_vfs);
+
+ if (!xe_gt_is_media_type(gt)) {
+ err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs);
+ result = result ?: err;
+ err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs);
+ result = result ?: err;
+ }
+ err = xe_gt_sriov_pf_config_set_fair_ctxs(gt, vfid, num_vfs);
+ result = result ?: err;
+ err = xe_gt_sriov_pf_config_set_fair_dbs(gt, vfid, num_vfs);
+ result = result ?: err;
+
+ return result;
+}
+
+static const char *exec_quantum_unit(u32 exec_quantum)
+{
+ return exec_quantum ? "ms" : "(infinity)";
+}
+
+static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int err;
+
+ err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum);
+ if (unlikely(err))
+ return err;
+
+ config->exec_quantum = exec_quantum;
+ return 0;
+}
+
+static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ return config->exec_quantum;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_provision_exec_quantum(gt, vfid, exec_quantum);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u32_done(gt, vfid, exec_quantum,
+ xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid),
+ "execution quantum", exec_quantum_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's) execution quantum in milliseconds.
+ */
+u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 exec_quantum;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ exec_quantum = pf_get_exec_quantum(gt, vfid);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return exec_quantum;
+}
+
+static const char *preempt_timeout_unit(u32 preempt_timeout)
+{
+ return preempt_timeout ? "us" : "(infinity)";
+}
+
+static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int err;
+
+ err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout);
+ if (unlikely(err))
+ return err;
+
+ config->preempt_timeout = preempt_timeout;
+
+ return 0;
+}
+
+static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ return config->preempt_timeout;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u32_done(gt, vfid, preempt_timeout,
+ xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid),
+ "preemption timeout", preempt_timeout_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's) preemption timeout in microseconds.
+ */
+u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 preempt_timeout;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ preempt_timeout = pf_get_preempt_timeout(gt, vfid);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return preempt_timeout;
+}
+
+static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ config->exec_quantum = 0;
+ config->preempt_timeout = 0;
+}
+
+static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ if (!xe_gt_is_media_type(gt)) {
+ pf_release_vf_config_ggtt(gt, config);
+ pf_release_vf_config_lmem(gt, config);
+ }
+ pf_release_config_ctxs(gt, config);
+ pf_release_config_dbs(gt, config);
+ pf_reset_config_sched(gt, config);
+}
+
+/**
+ * xe_gt_sriov_pf_config_release - Release and reset VF configuration.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @force: force configuration release
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force)
+{
+ int err;
+
+ xe_gt_assert(gt, vfid);
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_send_vf_cfg_reset(gt, vfid);
+ if (!err || force)
+ pf_release_vf_config(gt, vfid);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (unlikely(err)) {
+ xe_gt_sriov_notice(gt, "VF%u unprovisioning failed with error (%pe)%s\n",
+ vfid, ERR_PTR(err),
+ force ? " but all resources were released anyway!" : "");
+ }
+
+ return force ? 0 : err;
+}
+
+/**
+ * xe_gt_sriov_pf_config_push - Reprovision VF's configuration.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @refresh: explicit refresh
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh)
+{
+ int err = 0;
+
+ xe_gt_assert(gt, vfid);
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (refresh)
+ err = pf_send_vf_cfg_reset(gt, vfid);
+ if (!err)
+ err = pf_push_full_vf_config(gt, vfid);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (unlikely(err)) {
+ xe_gt_sriov_notice(gt, "Failed to %s VF%u configuration (%pe)\n",
+ refresh ? "refresh" : "push", vfid, ERR_PTR(err));
+ }
+
+ return err;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_ggtt - Print GGTT configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GGTT configuration data for all VFs.
+ * VFs without provisioned GGTT are ignored.
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+ const struct xe_gt_sriov_config *config;
+ char buf[10];
+
+ for (n = 1; n <= total_vfs; n++) {
+ config = &gt->sriov.pf.vfs[n].config;
+ if (!drm_mm_node_allocated(&config->ggtt_region))
+ continue;
+
+ string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n",
+ n, config->ggtt_region.start,
+ config->ggtt_region.start + config->ggtt_region.size - 1, buf);
+ }
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_ctxs - Print GuC context IDs configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GuC context ID allocations across all VFs.
+ * VFs without GuC context IDs are skipped.
+ *
+ * This function can only be called on PF.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+ const struct xe_gt_sriov_config *config;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 1; n <= total_vfs; n++) {
+ config = &gt->sriov.pf.vfs[n].config;
+ if (!config->num_ctxs)
+ continue;
+
+ drm_printf(p, "VF%u:\t%u-%u\t(%u)\n",
+ n,
+ config->begin_ctx,
+ config->begin_ctx + config->num_ctxs - 1,
+ config->num_ctxs);
+ }
+
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_dbs - Print GuC doorbell ID configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GuC doorbell IDs allocations across all VFs.
+ * VFs without GuC doorbell IDs are skipped.
+ *
+ * This function can only be called on PF.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+ const struct xe_gt_sriov_config *config;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 1; n <= total_vfs; n++) {
+ config = &gt->sriov.pf.vfs[n].config;
+ if (!config->num_dbs)
+ continue;
+
+ drm_printf(p, "VF%u:\t%u-%u\t(%u)\n",
+ n,
+ config->begin_db,
+ config->begin_db + config->num_dbs - 1,
+ config->num_dbs);
+ }
+
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GGTT ranges that are available for the provisioning.
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt;
+ const struct drm_mm *mm = &ggtt->mm;
+ const struct drm_mm_node *entry;
+ u64 alignment = pf_get_ggtt_alignment(gt);
+ u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt));
+ u64 hole_start, hole_end, hole_size;
+ u64 spare, avail, total = 0;
+ char buf[10];
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+ spare = pf_get_spare_ggtt(gt);
+
+ mutex_lock(&ggtt->lock);
+
+ drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
+ hole_start = max(hole_start, hole_min_start);
+ hole_start = ALIGN(hole_start, alignment);
+ hole_end = ALIGN_DOWN(hole_end, alignment);
+ if (hole_start >= hole_end)
+ continue;
+ hole_size = hole_end - hole_start;
+ total += hole_size;
+
+ string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n",
+ hole_start, hole_end - 1, buf);
+ }
+
+ mutex_unlock(&ggtt->lock);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "total:\t%llu\t(%s)\n", total, buf);
+
+ string_get_size(spare, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "spare:\t%llu\t(%s)\n", spare, buf);
+
+ avail = total > spare ? total - spare : 0;
+
+ string_get_size(avail, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "avail:\t%llu\t(%s)\n", avail, buf);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
new file mode 100644
index 000000000000..5e6b36f00b5b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_CONFIG_H_
+#define _XE_GT_SRIOV_PF_CONFIG_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size);
+int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt,
+ unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt,
+ unsigned int vfid, unsigned int num_vfs, u64 size);
+
+u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs);
+int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs,
+ u32 num_ctxs);
+
+u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs);
+int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs,
+ u32 num_dbs);
+
+u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size);
+int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs,
+ u64 size);
+
+u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum);
+
+u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout);
+
+int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force);
+int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh);
+
+int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p);
+
+int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
new file mode 100644
index 000000000000..d3745c355957
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
+#define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_gt_sriov_config - GT level per-VF configuration data.
+ *
+ * Used by the PF driver to maintain per-VF provisioning data.
+ */
+struct xe_gt_sriov_config {
+ /** @ggtt_region: GGTT region assigned to the VF. */
+ struct drm_mm_node ggtt_region;
+ /** @lmem_obj: LMEM allocation for use by the VF. */
+ struct xe_bo *lmem_obj;
+ /** @num_ctxs: number of GuC contexts IDs. */
+ u16 num_ctxs;
+ /** @begin_ctx: start index of GuC context ID range. */
+ u16 begin_ctx;
+ /** @num_dbs: number of GuC doorbells IDs. */
+ u16 num_dbs;
+ /** @begin_db: start index of GuC doorbell ID range. */
+ u16 begin_db;
+ /** @exec_quantum: execution-quantum in milliseconds. */
+ u32 exec_quantum;
+ /** @preempt_timeout: preemption timeout in microseconds. */
+ u32 preempt_timeout;
+};
+
+/**
+ * struct xe_gt_sriov_spare_config - GT-level PF spare configuration data.
+ *
+ * Used by the PF driver to maintain it's own reserved (spare) provisioning
+ * data that is not applicable to be tracked in struct xe_gt_sriov_config.
+ */
+struct xe_gt_sriov_spare_config {
+ /** @ggtt_size: GGTT size. */
+ u64 ggtt_size;
+ /** @lmem_size: LMEM size. */
+ u64 lmem_size;
+ /** @num_ctxs: number of GuC submission contexts. */
+ u16 num_ctxs;
+ /** @num_dbs: number of GuC doorbells. */
+ u16 num_dbs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
new file mode 100644
index 000000000000..40b8f881fe04
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include "abi/guc_actions_sriov_abi.h"
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc_ct.h"
+#include "xe_sriov.h"
+
+static const char *control_cmd_to_string(u32 cmd)
+{
+ switch (cmd) {
+ case GUC_PF_TRIGGER_VF_PAUSE:
+ return "PAUSE";
+ case GUC_PF_TRIGGER_VF_RESUME:
+ return "RESUME";
+ case GUC_PF_TRIGGER_VF_STOP:
+ return "STOP";
+ case GUC_PF_TRIGGER_VF_FLR_START:
+ return "FLR_START";
+ case GUC_PF_TRIGGER_VF_FLR_FINISH:
+ return "FLR_FINISH";
+ default:
+ return "<unknown>";
+ }
+}
+
+static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
+{
+ u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
+ FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
+ FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
+ };
+ int ret;
+
+ /* XXX those two commands are now sent from the G2H handler */
+ if (cmd == GUC_PF_TRIGGER_VF_FLR_START || cmd == GUC_PF_TRIGGER_VF_FLR_FINISH)
+ return xe_guc_ct_send_g2h_handler(&guc->ct, request, ARRAY_SIZE(request));
+
+ ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+ return ret > 0 ? -EPROTO : ret;
+}
+
+static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
+{
+ int err;
+
+ xe_gt_assert(gt, vfid != PFID);
+
+ err = guc_action_vf_control_cmd(&gt->uc.guc, vfid, cmd);
+ if (unlikely(err))
+ xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
+ vfid, control_cmd_to_string(cmd), ERR_PTR(err));
+ return err;
+}
+
+static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
+}
+
+static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
+}
+
+static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
+}
+
+static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
+}
+
+static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
+}
+
+/**
+ * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_pause(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_resume(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_send_vf_stop(gt, vfid);
+}
+
+/**
+ * DOC: The VF FLR Flow with GuC
+ *
+ * PF GUC PCI
+ * ========================================================
+ * | | |
+ * (1) | [ ] <----- FLR --|
+ * | [ ] :
+ * (2) [ ] <-------- NOTIFY FLR --[ ]
+ * [ ] |
+ * (3) [ ] |
+ * [ ] |
+ * [ ]-- START FLR ---------> [ ]
+ * | [ ]
+ * (4) | [ ]
+ * | [ ]
+ * [ ] <--------- FLR DONE -- [ ]
+ * [ ] |
+ * (5) [ ] |
+ * [ ] |
+ * [ ]-- FINISH FLR --------> [ ]
+ * | |
+ *
+ * Step 1: PCI HW generates interrupt to the GuC about VF FLR
+ * Step 2: GuC FW sends G2H notification to the PF about VF FLR
+ * Step 2a: on some platforms G2H is only received from root GuC
+ * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
+ * Step 3a: on some platforms PF must send H2G to all other GuCs
+ * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
+ * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
+ */
+
+static bool needs_dispatch_flr(struct xe_device *xe)
+{
+ return xe->info.platform == XE_PVC;
+}
+
+static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_gt *gtit;
+ unsigned int gtid;
+
+ xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
+
+ if (needs_dispatch_flr(xe)) {
+ for_each_gt(gtit, xe, gtid)
+ pf_send_vf_flr_start(gtit, vfid);
+ } else {
+ pf_send_vf_flr_start(gt, vfid);
+ }
+}
+
+static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
+{
+ pf_send_vf_flr_finish(gt, vfid);
+}
+
+static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
+{
+ switch (eventid) {
+ case GUC_PF_NOTIFY_VF_FLR:
+ pf_handle_vf_flr(gt, vfid);
+ break;
+ case GUC_PF_NOTIFY_VF_FLR_DONE:
+ pf_handle_vf_flr_done(gt, vfid);
+ break;
+ case GUC_PF_NOTIFY_VF_PAUSE_DONE:
+ break;
+ case GUC_PF_NOTIFY_VF_FIXUP_DONE:
+ break;
+ default:
+ return -ENOPKG;
+ }
+ return 0;
+}
+
+static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
+{
+ switch (eventid) {
+ case GUC_PF_NOTIFY_VF_ENABLE:
+ xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
+ str_enabled_disabled(true),
+ str_enabled_disabled(false));
+ break;
+ default:
+ return -ENOPKG;
+ }
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
+ * @gt: the &xe_gt
+ * @msg: the G2H message
+ * @len: the length of the G2H message
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+ u32 vfid;
+ u32 eventid;
+
+ xe_gt_assert(gt, len);
+ xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+ xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+ xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+ GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
+
+ if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
+ return -EPROTO;
+
+ if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
+ return -EPFNOSUPPORT;
+
+ if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
+ return -EPROTO;
+
+ vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
+ eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
+
+ return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
new file mode 100644
index 000000000000..850a3e37661f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_CONTROL_H_
+#define _XE_GT_SRIOV_PF_CONTROL_H_
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len);
+#else
+static inline int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+ return -EPROTO;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
new file mode 100644
index 000000000000..0bf12d89ceb2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_HELPERS_H_
+#define _XE_GT_SRIOV_PF_HELPERS_H_
+
+#include "xe_gt_types.h"
+#include "xe_sriov_pf_helpers.h"
+
+/**
+ * xe_gt_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging.
+ * @gt: the PF &xe_gt to assert on
+ * @vfid: the VF number to assert
+ *
+ * Assert that &gt belongs to the Physical Function (PF) device and provided &vfid
+ * is within a range of supported VF numbers (up to maximum number of VFs that
+ * driver can support, including VF0 that represents the PF itself).
+ *
+ * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ */
+#define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid))
+
+static inline int xe_gt_sriov_pf_get_totalvfs(struct xe_gt *gt)
+{
+ return xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+}
+
+static inline struct mutex *xe_gt_sriov_pf_master_mutex(struct xe_gt *gt)
+{
+ return xe_sriov_pf_master_mutex(gt_to_xe(gt));
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
new file mode 100644
index 000000000000..fae5be5a2a11
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include "abi/guc_actions_sriov_abi.h"
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_policy.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_pm.h"
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ * negative error code on failure.
+ */
+static int guc_action_update_vgt_policy(struct xe_guc *guc, u64 addr, u32 size)
+{
+ u32 request[] = {
+ GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY,
+ lower_32_bits(addr),
+ upper_32_bits(addr),
+ size,
+ };
+
+ return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+}
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ * negative error code on failure.
+ */
+static int pf_send_policy_klvs(struct xe_gt *gt, const u32 *klvs, u32 num_dwords)
+{
+ const u32 bytes = num_dwords * sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(bytes, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes);
+
+ ret = guc_action_update_vgt_policy(guc, xe_bo_ggtt_addr(bo), num_dwords);
+
+ xe_bo_unpin_map_no_vm(bo);
+
+ return ret;
+}
+
+/*
+ * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed,
+ * negative error code on failure.
+ */
+static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs,
+ const u32 *klvs, u32 num_dwords)
+{
+ int ret;
+
+ xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords));
+
+ ret = pf_send_policy_klvs(gt, klvs, num_dwords);
+
+ if (ret != num_klvs) {
+ int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO;
+ struct drm_printer p = xe_gt_info_printer(gt);
+
+ xe_gt_sriov_notice(gt, "Failed to push %u policy KLV%s (%pe)\n",
+ num_klvs, str_plural(num_klvs), ERR_PTR(err));
+ xe_guc_klv_print(klvs, num_dwords, &p);
+ return err;
+ }
+
+ return 0;
+}
+
+static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value)
+{
+ u32 klv[] = {
+ PREP_GUC_KLV(key, 1),
+ value,
+ };
+
+ return pf_push_policy_klvs(gt, 1, klv, ARRAY_SIZE(klv));
+}
+
+static int pf_update_policy_bool(struct xe_gt *gt, u16 key, bool *policy, bool value)
+{
+ int err;
+
+ err = pf_push_policy_u32(gt, key, value);
+ if (unlikely(err)) {
+ xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n",
+ key, xe_guc_klv_key_to_string(key),
+ str_enabled_disabled(value), ERR_PTR(err));
+ return err;
+ }
+
+ xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to '%s'\n",
+ key, xe_guc_klv_key_to_string(key),
+ str_enabled_disabled(value));
+
+ *policy = value;
+ return 0;
+}
+
+static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 value)
+{
+ int err;
+
+ err = pf_push_policy_u32(gt, key, value);
+ if (unlikely(err)) {
+ xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n",
+ key, xe_guc_klv_key_to_string(key),
+ str_enabled_disabled(value), ERR_PTR(err));
+ return err;
+ }
+
+ xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to %u\n",
+ key, xe_guc_klv_key_to_string(key), value);
+
+ *policy = value;
+ return 0;
+}
+
+static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY,
+ &gt->sriov.pf.policy.guc.sched_if_idle,
+ enable);
+}
+
+static int pf_reprovision_sched_if_idle(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_provision_sched_if_idle(gt, gt->sriov.pf.policy.guc.sched_if_idle);
+}
+
+static void pf_sanitize_sched_if_idle(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ gt->sriov.pf.policy.guc.sched_if_idle = false;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_set_sched_if_idle - Control the 'sched_if_idle' policy.
+ * @gt: the &xe_gt where to apply the policy
+ * @enable: the value of the 'sched_if_idle' policy
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_provision_sched_if_idle(gt, enable);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return err;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_get_sched_if_idle - Retrieve value of 'sched_if_idle' policy.
+ * @gt: the &xe_gt where to read the policy from
+ *
+ * This function can only be called on PF.
+ *
+ * Return: value of 'sched_if_idle' policy.
+ */
+bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt)
+{
+ bool enable;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ enable = gt->sriov.pf.policy.guc.sched_if_idle;
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return enable;
+}
+
+static int pf_provision_reset_engine(struct xe_gt *gt, bool enable)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY,
+ &gt->sriov.pf.policy.guc.reset_engine, enable);
+}
+
+static int pf_reprovision_reset_engine(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_provision_reset_engine(gt, gt->sriov.pf.policy.guc.reset_engine);
+}
+
+static void pf_sanitize_reset_engine(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ gt->sriov.pf.policy.guc.reset_engine = false;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_set_reset_engine - Control the 'reset_engine' policy.
+ * @gt: the &xe_gt where to apply the policy
+ * @enable: the value of the 'reset_engine' policy
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_provision_reset_engine(gt, enable);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return err;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_get_reset_engine - Retrieve value of 'reset_engine' policy.
+ * @gt: the &xe_gt where to read the policy from
+ *
+ * This function can only be called on PF.
+ *
+ * Return: value of 'reset_engine' policy.
+ */
+bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt)
+{
+ bool enable;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ enable = gt->sriov.pf.policy.guc.reset_engine;
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return enable;
+}
+
+static int pf_provision_sample_period(struct xe_gt *gt, u32 value)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_update_policy_u32(gt, GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY,
+ &gt->sriov.pf.policy.guc.sample_period, value);
+}
+
+static int pf_reprovision_sample_period(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_provision_sample_period(gt, gt->sriov.pf.policy.guc.sample_period);
+}
+
+static void pf_sanitize_sample_period(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ gt->sriov.pf.policy.guc.sample_period = 0;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_set_sample_period - Control the 'sample_period' policy.
+ * @gt: the &xe_gt where to apply the policy
+ * @value: the value of the 'sample_period' policy
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_provision_sample_period(gt, value);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return err;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_get_sample_period - Retrieve value of 'sample_period' policy.
+ * @gt: the &xe_gt where to read the policy from
+ *
+ * This function can only be called on PF.
+ *
+ * Return: value of 'sample_period' policy.
+ */
+u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt)
+{
+ u32 value;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ value = gt->sriov.pf.policy.guc.sample_period;
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return value;
+}
+
+static void pf_sanitize_guc_policies(struct xe_gt *gt)
+{
+ pf_sanitize_sched_if_idle(gt);
+ pf_sanitize_reset_engine(gt);
+ pf_sanitize_sample_period(gt);
+}
+
+/**
+ * xe_gt_sriov_pf_policy_sanitize - Reset policy settings.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt)
+{
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ pf_sanitize_guc_policies(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+}
+
+/**
+ * xe_gt_sriov_pf_policy_reprovision - Reprovision (and optionally reset) policy settings.
+ * @gt: the &xe_gt
+ * @reset: if true will reprovision using default values instead of latest
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset)
+{
+ int err = 0;
+
+ xe_pm_runtime_get_noresume(gt_to_xe(gt));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ if (reset)
+ pf_sanitize_guc_policies(gt);
+ err |= pf_reprovision_sched_if_idle(gt);
+ err |= pf_reprovision_reset_engine(gt);
+ err |= pf_reprovision_sample_period(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ xe_pm_runtime_put(gt_to_xe(gt));
+
+ return err ? -ENXIO : 0;
+}
+
+static void print_guc_policies(struct drm_printer *p, struct xe_gt_sriov_guc_policies *policy)
+{
+ drm_printf(p, "%s:\t%s\n",
+ xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY),
+ str_enabled_disabled(policy->sched_if_idle));
+ drm_printf(p, "%s:\t%s\n",
+ xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY),
+ str_enabled_disabled(policy->reset_engine));
+ drm_printf(p, "%s:\t%u %s\n",
+ xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY),
+ policy->sample_period, policy->sample_period ? "ms" : "(disabled)");
+}
+
+/**
+ * xe_gt_sriov_pf_policy_print - Dump actual policy values.
+ * @gt: the &xe_gt where to read the policy from
+ * @p: the &drm_printer
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ print_guc_policies(p, &gt->sriov.pf.policy.guc);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h
new file mode 100644
index 000000000000..2a5dc33dc6d7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_POLICY_H_
+#define _XE_GT_SRIOV_PF_POLICY_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable);
+bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt);
+int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable);
+bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt);
+int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value);
+u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt);
+
+void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt);
+int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset);
+int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h
new file mode 100644
index 000000000000..4de532af135e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_POLICY_TYPES_H_
+#define _XE_GT_SRIOV_PF_POLICY_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_gt_sriov_guc_policies - GuC SR-IOV policies.
+ * @sched_if_idle: controls strict scheduling policy.
+ * @reset_engine: controls engines reset on VF switch policy.
+ * @sample_period: adverse events sampling period (in milliseconds).
+ */
+struct xe_gt_sriov_guc_policies {
+ bool sched_if_idle;
+ bool reset_engine;
+ u32 sample_period;
+};
+
+/**
+ * struct xe_gt_sriov_pf_policy - PF policy data.
+ * @guc: GuC scheduling policies.
+ */
+struct xe_gt_sriov_pf_policy {
+ struct xe_gt_sriov_guc_policies guc;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
new file mode 100644
index 000000000000..faf9ee8266ce
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_TYPES_H_
+#define _XE_GT_SRIOV_PF_TYPES_H_
+
+#include <linux/types.h>
+
+#include "xe_gt_sriov_pf_config_types.h"
+#include "xe_gt_sriov_pf_policy_types.h"
+
+/**
+ * struct xe_gt_sriov_metadata - GT level per-VF metadata.
+ */
+struct xe_gt_sriov_metadata {
+ /** @config: per-VF provisioning data. */
+ struct xe_gt_sriov_config config;
+};
+
+/**
+ * struct xe_gt_sriov_pf - GT level PF virtualization data.
+ * @policy: policy data.
+ * @spare: PF-only provisioning configuration.
+ * @vfs: metadata for all VFs.
+ */
+struct xe_gt_sriov_pf {
+ struct xe_gt_sriov_pf_policy policy;
+ struct xe_gt_sriov_spare_config spare;
+ struct xe_gt_sriov_metadata *vfs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index c69d2e8a0fe1..1e5971072bc8 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -29,7 +29,7 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg)
kobject_put(gt->sysfs);
}
-void xe_gt_sysfs_init(struct xe_gt *gt)
+int xe_gt_sysfs_init(struct xe_gt *gt)
{
struct xe_tile *tile = gt_to_tile(gt);
struct xe_device *xe = gt_to_xe(gt);
@@ -38,24 +38,18 @@ void xe_gt_sysfs_init(struct xe_gt *gt)
kg = kzalloc(sizeof(*kg), GFP_KERNEL);
if (!kg)
- return;
+ return -ENOMEM;
kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
kg->gt = gt;
err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
if (err) {
- drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err);
kobject_put(&kg->base);
- return;
+ return err;
}
gt->sysfs = &kg->base;
- err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
- if (err) {
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
- return;
- }
+ return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
index e3ec278ca0be..ecbfcc5c7d42 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -8,7 +8,7 @@
#include "xe_gt_sysfs_types.h"
-void xe_gt_sysfs_init(struct xe_gt *gt);
+int xe_gt_sysfs_init(struct xe_gt *gt);
static inline struct xe_gt *
kobj_to_gt(struct kobject *kobj)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
index 63d640591a52..fbe21a8599ca 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
@@ -11,6 +11,7 @@
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle_sysfs.h"
#include "xe_mmio.h"
+#include "xe_pm.h"
/**
* DOC: Xe GT Throttle
@@ -38,10 +39,12 @@ static u32 read_perf_limit_reasons(struct xe_gt *gt)
{
u32 reg;
+ xe_pm_runtime_get(gt_to_xe(gt));
if (xe_gt_is_media_type(gt))
reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
else
reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+ xe_pm_runtime_put(gt_to_xe(gt));
return reg;
}
@@ -233,19 +236,14 @@ static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
sysfs_remove_group(gt->freq, &throttle_group_attrs);
}
-void xe_gt_throttle_sysfs_init(struct xe_gt *gt)
+int xe_gt_throttle_sysfs_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
int err;
err = sysfs_create_group(gt->freq, &throttle_group_attrs);
- if (err) {
- drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err);
- return;
- }
-
- err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
if (err)
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
+ return err;
+
+ return drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
index 3ecfd4beffe1..6c61e6f228a8 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
@@ -10,7 +10,7 @@
struct xe_gt;
-void xe_gt_throttle_sysfs_init(struct xe_gt *gt);
+int xe_gt_throttle_sysfs_init(struct xe_gt *gt);
#endif /* _XE_GT_THROTTLE_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index e598a4363d01..93df2d7969b3 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -11,7 +11,9 @@
#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
+#include "xe_mmio.h"
#include "xe_trace.h"
+#include "regs/xe_guc_regs.h"
#define TLB_TIMEOUT (HZ / 4)
@@ -209,7 +211,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
* negative error code on error.
*/
-int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
{
u32 action[] = {
XE_GUC_ACTION_TLB_INVALIDATION,
@@ -222,6 +224,45 @@ int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
}
/**
+ * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT
+ * @gt: graphics tile
+ *
+ * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
+ * synchronous.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
+ gt->uc.guc.submission_state.enabled) {
+ int seqno;
+
+ seqno = xe_gt_tlb_invalidation_guc(gt);
+ if (seqno <= 0)
+ return seqno;
+
+ xe_gt_tlb_invalidation_wait(gt, seqno);
+ } else if (xe_device_uc_enabled(xe)) {
+ xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+ xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+ PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+ xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+ PVC_GUC_TLB_INV_DESC0_VALID);
+ } else {
+ xe_mmio_write32(gt, GUC_TLB_INV_CR,
+ GUC_TLB_INV_CR_INVALIDATE);
+ }
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+
+ return 0;
+}
+
+/**
* xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
* @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index b333c1709397..fbb743d80d2c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -16,7 +16,7 @@ struct xe_vma;
int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
-int xe_gt_tlb_invalidation_guc(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 5dc62fe1be49..3733e7a6860d 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -8,12 +8,10 @@
#include <linux/bitmap.h>
#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_mmio.h"
-#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
-#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
-
static void
load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
{
@@ -62,6 +60,114 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
}
+/**
+ * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
+ *
+ * It is used to compute the L3 bank masks in a generic format on
+ * various platforms where the internal representation of L3 node
+ * and masks from registers are different.
+ *
+ * @xe: device
+ * @dst: destination
+ * @pattern: pattern to replicate
+ * @patternbits: size of the pattern, in bits
+ * @mask: mask describing where to replicate the pattern
+ *
+ * Example 1:
+ * ----------
+ * @pattern = 0b1111
+ * └┬─┘
+ * @patternbits = 4 (bits)
+ * @mask = 0b0101
+ * ││││
+ * │││└────────────────── 0b1111 (=1×0b1111)
+ * ││└──────────── 0b0000 │ (=0×0b1111)
+ * │└────── 0b1111 │ │ (=1×0b1111)
+ * └ 0b0000 │ │ │ (=0×0b1111)
+ * │ │ │ │
+ * @dst = 0b0000 0b1111 0b0000 0b1111
+ *
+ * Example 2:
+ * ----------
+ * @pattern = 0b11111111
+ * └┬─────┘
+ * @patternbits = 8 (bits)
+ * @mask = 0b10
+ * ││
+ * ││
+ * ││
+ * │└────────── 0b00000000 (=0×0b11111111)
+ * └ 0b11111111 │ (=1×0b11111111)
+ * │ │
+ * @dst = 0b11111111 0b00000000
+ */
+static void
+gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
+ xe_l3_bank_mask_t pattern, int patternbits,
+ unsigned long mask)
+{
+ unsigned long bit;
+
+ xe_assert(xe, fls(mask) <= patternbits);
+ for_each_set_bit(bit, &mask, 32) {
+ xe_l3_bank_mask_t shifted_pattern = {};
+
+ bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
+ XE_MAX_L3_BANK_MASK_BITS);
+ bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
+ }
+}
+
+static void
+load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
+
+ if (GRAPHICS_VER(xe) >= 20) {
+ xe_l3_bank_mask_t per_node = {};
+ u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
+ u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
+
+ bitmap_from_arr32(per_node, &bank_val, 32);
+ gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
+ meml3_en);
+ } else if (GRAPHICS_VERx100(xe) >= 1270) {
+ xe_l3_bank_mask_t per_node = {};
+ xe_l3_bank_mask_t per_mask_bit = {};
+ u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
+ u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
+ u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
+
+ bitmap_set_value8(per_mask_bit, 0x3, 0);
+ gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
+ gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
+ meml3_en);
+ } else if (xe->info.platform == XE_PVC) {
+ xe_l3_bank_mask_t per_node = {};
+ xe_l3_bank_mask_t per_mask_bit = {};
+ u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
+ u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
+
+ bitmap_set_value8(per_mask_bit, 0xf, 0);
+ gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
+ bank_val);
+ gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
+ meml3_en);
+ } else if (xe->info.platform == XE_DG2) {
+ xe_l3_bank_mask_t per_node = {};
+ u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
+
+ bitmap_set_value8(per_node, 0xff, 0);
+ gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
+ } else {
+ /* 1:1 register bit to mask bit (inverted register bits) */
+ u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
+
+ bitmap_from_arr32(l3_bank_mask, &mask, 32);
+ }
+}
+
static void
get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
{
@@ -106,6 +212,7 @@ xe_gt_topology_init(struct xe_gt *gt)
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
XE2_GT_COMPUTE_DSS_2);
load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+ load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
@@ -123,6 +230,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
gt->fuse_topo.eu_mask_per_dss);
+ drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
+ gt->fuse_topo.l3_bank_mask);
}
/*
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index d1b54fb52ea6..b3e357777a6e 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -8,6 +8,17 @@
#include "xe_gt_types.h"
+/*
+ * Loop over each DSS with the bit is 1 in geometry or compute mask
+ * @dss: iterated DSS bit from the DSS mask
+ * @gt: GT structure
+ */
+#define for_each_dss(dss, gt) \
+ for_each_or_bit((dss), \
+ (gt)->fuse_topo.g_dss_mask, \
+ (gt)->fuse_topo.c_dss_mask, \
+ XE_MAX_DSS_FUSE_BITS)
+
struct drm_printer;
void xe_gt_topology_init(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 07b2f724ec45..cfdc761ff7f4 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -8,6 +8,7 @@
#include "xe_force_wake_types.h"
#include "xe_gt_idle_types.h"
+#include "xe_gt_sriov_pf_types.h"
#include "xe_hw_engine_types.h"
#include "xe_hw_fence_types.h"
#include "xe_reg_sr_types.h"
@@ -24,11 +25,15 @@ enum xe_gt_type {
XE_GT_TYPE_MEDIA,
};
-#define XE_MAX_DSS_FUSE_REGS 3
-#define XE_MAX_EU_FUSE_REGS 1
+#define XE_MAX_DSS_FUSE_REGS 3
+#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
+#define XE_MAX_EU_FUSE_REGS 1
+#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
+#define XE_MAX_L3_BANK_MASK_BITS 64
-typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
-typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
+typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
+typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)];
struct xe_mmio_range {
u32 start;
@@ -138,6 +143,12 @@ struct xe_gt {
u32 adj_offset;
} mmio;
+ /** @sriov: virtualization data related to GT */
+ union {
+ /** @sriov.pf: PF data. Valid only if driver is running as PF */
+ struct xe_gt_sriov_pf pf;
+ } sriov;
+
/**
* @reg_sr: table with registers to be restored on GT init/resume/reset
*/
@@ -325,6 +336,9 @@ struct xe_gt {
/** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
xe_eu_mask_t eu_mask_per_dss;
+
+ /** @fuse_topo.l3_bank_mask: L3 bank mask */
+ xe_l3_bank_mask_t l3_bank_mask;
} fuse_topo;
/** @steering: register steering for individual HW units */
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 0d2a2dd13f11..240e7a4bbff1 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -12,11 +12,13 @@
#include "abi/guc_actions_abi.h"
#include "abi/guc_errors_abi.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_gtt_defs.h"
#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_guc_ads.h"
#include "xe_guc_ct.h"
#include "xe_guc_hwconfig.h"
@@ -33,14 +35,13 @@
#include "xe_wa.h"
#include "xe_wopcm.h"
-/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
-#define GUC_GGTT_TOP 0xFEE00000
static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
struct xe_bo *bo)
{
struct xe_device *xe = guc_to_xe(guc);
u32 addr = xe_bo_ggtt_addr(bo);
+ /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */
xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
xe_assert(xe, addr < GUC_GGTT_TOP);
xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr);
@@ -133,15 +134,10 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc)
return flags;
}
-#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
-
static u32 guc_ctl_wa_flags(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_uc_fw *uc_fw = &guc->fw;
- struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
-
u32 flags = 0;
if (XE_WA(gt, 22012773006))
@@ -164,20 +160,15 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
flags |= GUC_WA_CONTEXT_ISOLATION;
- if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) &&
+ if (XE_WA(gt, 18020744125) &&
!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
if (XE_WA(gt, 1509372804))
flags |= GUC_WA_RENDER_RST_RC6_EXIT;
- if (XE_WA(gt, 14018913170)) {
- if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(70, 7, 0))
- flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
- else
- drm_dbg(&xe->drm, "Skip WA 14018913170: GUC version expected >= 70.7.0, found %u.%u.%u\n",
- version->major, version->minor, version->patch);
- }
+ if (XE_WA(gt, 14018913170))
+ flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
return flags;
}
@@ -189,15 +180,23 @@ static u32 guc_ctl_devid(struct xe_guc *guc)
return (((u32)xe->info.devid) << 16) | xe->info.revid;
}
-static void guc_init_params(struct xe_guc *guc)
+static void guc_print_params(struct xe_guc *guc)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
u32 *params = guc->params;
int i;
BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
+ for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+ xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+static void guc_init_params(struct xe_guc *guc)
+{
+ u32 *params = guc->params;
+
params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
params[GUC_CTL_FEATURE] = 0;
params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
@@ -205,18 +204,12 @@ static void guc_init_params(struct xe_guc *guc)
params[GUC_CTL_WA] = 0;
params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
- for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
- drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+ guc_print_params(guc);
}
static void guc_init_params_post_hwconfig(struct xe_guc *guc)
{
- struct xe_device *xe = guc_to_xe(guc);
u32 *params = guc->params;
- int i;
-
- BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
- BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
@@ -225,8 +218,7 @@ static void guc_init_params_post_hwconfig(struct xe_guc *guc)
params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
- for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
- drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+ guc_print_params(guc);
}
/*
@@ -250,10 +242,11 @@ static void guc_write_params(struct xe_guc *guc)
static void guc_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
+ struct xe_gt *gt = guc_to_gt(guc);
- xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+ xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_uc_fini_hw(&guc_to_gt(guc)->uc);
- xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
}
/**
@@ -330,7 +323,7 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
- ret = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, guc_fini, guc);
+ ret = drmm_add_action_or_reset(&xe->drm, guc_fini, guc);
if (ret)
goto out;
@@ -343,7 +336,7 @@ int xe_guc_init(struct xe_guc *guc)
return 0;
out:
- drm_err(&xe->drm, "GuC init failed with %d", ret);
+ xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret));
return ret;
}
@@ -380,7 +373,6 @@ int xe_guc_post_load_init(struct xe_guc *guc)
int xe_guc_reset(struct xe_guc *guc)
{
- struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
u32 guc_status, gdrst;
int ret;
@@ -391,16 +383,14 @@ int xe_guc_reset(struct xe_guc *guc)
ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
if (ret) {
- drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n",
- gdrst);
+ xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst);
goto err_out;
}
guc_status = xe_mmio_read32(gt, GUC_STATUS);
if (!(guc_status & GS_MIA_IN_RESET)) {
- drm_err(&xe->drm,
- "GuC status: 0x%x, MIA core expected to be in reset\n",
- guc_status);
+ xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n",
+ guc_status);
ret = -EIO;
goto err_out;
}
@@ -463,7 +453,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
static int guc_wait_ucode(struct xe_guc *guc)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
u32 status;
int ret;
@@ -484,35 +474,32 @@ static int guc_wait_ucode(struct xe_guc *guc)
* 200ms. Even at slowest clock, this should be sufficient. And
* in the working case, a larger timeout makes no difference.
*/
- ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK,
+ ret = xe_mmio_wait32(gt, GUC_STATUS, GS_UKERNEL_MASK,
FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY),
200000, &status, false);
if (ret) {
- struct drm_device *drm = &xe->drm;
-
- drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
- drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
- REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status),
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+ xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status);
+ xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ REG_FIELD_GET(GS_BOOTROM_MASK, status),
+ REG_FIELD_GET(GS_UKERNEL_MASK, status),
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
- drm_info(drm, "GuC firmware signature verification failed\n");
+ xe_gt_info(gt, "GuC firmware signature verification failed\n");
ret = -ENOEXEC;
}
if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
XE_GUC_LOAD_STATUS_EXCEPTION) {
- drm_info(drm, "GuC firmware exception. EIP: %#x\n",
- xe_mmio_read32(guc_to_gt(guc),
- SOFT_SCRATCH(13)));
+ xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n",
+ xe_mmio_read32(gt, SOFT_SCRATCH(13)));
ret = -ENXIO;
}
} else {
- drm_dbg(&xe->drm, "GuC successfully loaded");
+ xe_gt_dbg(gt, "GuC successfully loaded\n");
}
return ret;
@@ -604,6 +591,9 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
struct xe_gt *gt = guc_to_gt(guc);
u32 msg;
+ if (IS_SRIOV_VF(guc_to_xe(guc)))
+ return;
+
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
@@ -612,12 +602,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
- drm_err(&guc_to_xe(guc)->drm,
- "Received early GuC crash dump notification!\n");
+ xe_gt_err(gt, "Received early GuC crash dump notification!\n");
if (msg & XE_GUC_RECV_MSG_EXCEPTION)
- drm_err(&guc_to_xe(guc)->drm,
- "Received early GuC exception notification!\n");
+ xe_gt_err(gt, "Received early GuC exception notification!\n");
}
static void guc_enable_irq(struct xe_guc *guc)
@@ -668,15 +656,15 @@ int xe_guc_enable_communication(struct xe_guc *guc)
int xe_guc_suspend(struct xe_guc *guc)
{
- int ret;
+ struct xe_gt *gt = guc_to_gt(guc);
u32 action[] = {
XE_GUC_ACTION_CLIENT_SOFT_RESET,
};
+ int ret;
ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
if (ret) {
- drm_err(&guc_to_xe(guc)->drm,
- "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
+ xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret));
return ret;
}
@@ -751,8 +739,8 @@ retry:
50000, &reply, false);
if (ret) {
timeout:
- drm_err(&xe->drm, "mmio request %#x: no reply %#x\n",
- request[0], reply);
+ xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n",
+ request[0], reply);
return ret;
}
@@ -790,8 +778,8 @@ timeout:
GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
- drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n",
- request[0], reason);
+ xe_gt_dbg(gt, "GuC mmio request %#x: retrying, reason %#x\n",
+ request[0], reason);
goto retry;
}
@@ -800,16 +788,16 @@ timeout:
u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
- drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n",
- request[0], error, hint);
+ xe_gt_err(gt, "GuC mmio request %#x: failure %#x hint %#x\n",
+ request[0], error, hint);
return -ENXIO;
}
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
GUC_HXG_TYPE_RESPONSE_SUCCESS) {
proto:
- drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n",
- request[0], header);
+ xe_gt_err(gt, "GuC mmio request %#x: unexpected reply %#x\n",
+ request[0], header);
return -EPROTO;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 6ad4c1a90a78..1aafa486edec 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -7,6 +7,8 @@
#include <drm/drm_managed.h>
+#include <generated/xe_wa_oob.h>
+
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_guc_regs.h"
@@ -19,6 +21,7 @@
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_platform_types.h"
+#include "xe_wa.h"
/* Slack of a few additional entries per engine */
#define ADS_REGSET_EXTRA_MAX 8
@@ -80,6 +83,10 @@ ads_to_map(struct xe_guc_ads *ads)
* +---------------------------------------+
* | padding |
* +---------------------------------------+ <== 4K aligned
+ * | w/a KLVs |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
* | capture lists |
* +---------------------------------------+
* | padding |
@@ -131,6 +138,11 @@ static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
return PAGE_ALIGN(ads->golden_lrc_size);
}
+static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
+{
+ return PAGE_ALIGN(ads->ads_waklv_size);
+}
+
static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
{
/* FIXME: Allocate a proper capture list */
@@ -167,12 +179,22 @@ static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
return PAGE_ALIGN(offset);
}
+static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads)
+{
+ u32 offset;
+
+ offset = guc_ads_golden_lrc_offset(ads) +
+ guc_ads_golden_lrc_size(ads);
+
+ return PAGE_ALIGN(offset);
+}
+
static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
{
size_t offset;
- offset = guc_ads_golden_lrc_offset(ads) +
- guc_ads_golden_lrc_size(ads);
+ offset = guc_ads_waklv_offset(ads) +
+ guc_ads_waklv_size(ads);
return PAGE_ALIGN(offset);
}
@@ -260,6 +282,110 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
return total_size;
}
+static void guc_waklv_enable_one_word(struct xe_guc_ads *ads,
+ enum xe_guc_klv_ids klv_id,
+ u32 value,
+ u32 *offset, u32 *remain)
+{
+ u32 size;
+ u32 klv_entry[] = {
+ /* 16:16 key/length */
+ FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
+ FIELD_PREP(GUC_KLV_0_LEN, 1),
+ value,
+ /* 1 dword data */
+ };
+
+ size = sizeof(klv_entry);
+
+ if (*remain < size) {
+ drm_warn(&ads_to_xe(ads)->drm,
+ "w/a klv buffer too small to add klv id %d\n", klv_id);
+ } else {
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
+ klv_entry, size);
+ *offset += size;
+ *remain -= size;
+ }
+}
+
+static void guc_waklv_enable_simple(struct xe_guc_ads *ads,
+ enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain)
+{
+ u32 klv_entry[] = {
+ /* 16:16 key/length */
+ FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
+ FIELD_PREP(GUC_KLV_0_LEN, 0),
+ /* 0 dwords data */
+ };
+ u32 size;
+
+ size = sizeof(klv_entry);
+
+ if (xe_gt_WARN(ads_to_gt(ads), *remain < size,
+ "w/a klv buffer too small to add klv id %d\n", klv_id))
+ return;
+
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
+ klv_entry, size);
+ *offset += size;
+ *remain -= size;
+}
+
+static void guc_waklv_init(struct xe_guc_ads *ads)
+{
+ struct xe_gt *gt = ads_to_gt(ads);
+ u64 addr_ggtt;
+ u32 offset, remain, size;
+
+ offset = guc_ads_waklv_offset(ads);
+ remain = guc_ads_waklv_size(ads);
+
+ if (XE_WA(gt, 14019882105))
+ guc_waklv_enable_simple(ads,
+ GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
+ &offset, &remain);
+ if (XE_WA(gt, 18024947630))
+ guc_waklv_enable_simple(ads,
+ GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
+ &offset, &remain);
+ if (XE_WA(gt, 16022287689))
+ guc_waklv_enable_simple(ads,
+ GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
+ &offset, &remain);
+
+ /*
+ * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
+ * the default value for this register is determined to be 0xC40. This could change in the
+ * future, so GuC depends on KMD to send it the correct value.
+ */
+ if (XE_WA(gt, 13011645652))
+ guc_waklv_enable_one_word(ads,
+ GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
+ 0xC40,
+ &offset, &remain);
+
+ size = guc_ads_waklv_size(ads) - remain;
+ if (!size)
+ return;
+
+ offset = guc_ads_waklv_offset(ads);
+ addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+ ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt));
+ ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt));
+ ads_blob_write(ads, ads.wa_klv_size, size);
+}
+
+static int calculate_waklv_size(struct xe_guc_ads *ads)
+{
+ /*
+ * A single page is both the minimum size possible and
+ * is sufficiently large enough for all current platforms.
+ */
+ return SZ_4K;
+}
+
#define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64)
int xe_guc_ads_init(struct xe_guc_ads *ads)
@@ -271,10 +397,12 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
ads->golden_lrc_size = calculate_golden_lrc_size(ads);
ads->regset_size = calculate_regset_size(gt);
+ ads->ads_waklv_size = calculate_waklv_size(ads);
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -597,6 +725,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_mapping_table_init(gt, &info_map);
guc_capture_list_init(ads);
guc_doorbell_init(ads);
+ guc_waklv_init(ads);
if (xe->info.has_usm) {
guc_um_init_params(ads);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
index 4afe44bece4b..2de5decfe0fd 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -20,6 +20,8 @@ struct xe_guc_ads {
size_t golden_lrc_size;
/** @regset_size: size of register set passed to GuC for save/restore */
u32 regset_size;
+ /** @ads_waklv_size: total waklv size supported by platform */
+ u32 ads_waklv_size;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 355edd4d758a..8ac819a7061e 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -21,6 +21,7 @@
#include "xe_gt.h"
#include "xe_gt_pagefault.h"
#include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_guc.h"
#include "xe_guc_relay.h"
@@ -143,20 +144,24 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
struct xe_bo *bo;
int err;
- xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
+ xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
- drmm_mutex_init(&xe->drm, &ct->lock);
spin_lock_init(&ct->fast_lock);
xa_init(&ct->fence_lookup);
INIT_WORK(&ct->g2h_worker, g2h_worker_func);
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
+ err = drmm_mutex_init(&xe->drm, &ct->lock);
+ if (err)
+ return err;
+
primelockdep(ct);
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -166,7 +171,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
if (err)
return err;
- xe_assert(xe, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
ct->state = XE_GUC_CT_STATE_DISABLED;
return 0;
}
@@ -313,9 +318,10 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
int xe_guc_ct_enable(struct xe_guc_ct *ct)
{
struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
int err;
- xe_assert(xe, !xe_guc_ct_enabled(ct));
+ xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -336,12 +342,12 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
smp_mb();
wake_up_all(&ct->wq);
- drm_dbg(&xe->drm, "GuC CT communication channel enabled\n");
+ xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
return 0;
err_out:
- drm_err(&xe->drm, "Failed to enable CT (%d)\n", err);
+ xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
return err;
}
@@ -422,7 +428,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
{
- xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space);
+ xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
if (g2h_len) {
lockdep_assert_held(&ct->fast_lock);
@@ -435,8 +441,8 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
{
lockdep_assert_held(&ct->fast_lock);
- xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <=
- ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
+ xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
ct->ctbs.g2h.info.space += g2h_len;
--ct->g2h_outstanding;
@@ -455,6 +461,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 ct_fence_value, bool want_response)
{
struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
struct guc_ctb *h2g = &ct->ctbs.h2g;
u32 cmd[H2G_CT_HEADERS];
u32 tail = h2g->info.tail;
@@ -465,8 +472,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
full_len = len + GUC_CTB_HDR_LEN;
lockdep_assert_held(&ct->lock);
- xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN);
- xe_assert(xe, tail <= h2g->info.size);
+ xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
+ xe_gt_assert(gt, tail <= h2g->info.size);
/* Command will wrap, zero fill (NOPs), return and check credits again */
if (tail + full_len > h2g->info.size) {
@@ -515,7 +522,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
/* Update descriptor */
desc_write(xe, h2g, tail, h2g->info.tail);
- trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len,
+ trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len,
desc_read(xe, h2g, head), h2g->info.tail);
return 0;
@@ -544,15 +551,15 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
u32 len, u32 g2h_len, u32 num_g2h,
struct g2h_fence *g2h_fence)
{
- struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
u16 seqno;
int ret;
- xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
- xe_assert(xe, !g2h_len || !g2h_fence);
- xe_assert(xe, !num_g2h || !g2h_fence);
- xe_assert(xe, !g2h_len || num_g2h);
- xe_assert(xe, g2h_len || !num_g2h);
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ xe_gt_assert(gt, !num_g2h || !g2h_fence);
+ xe_gt_assert(gt, !g2h_len || num_g2h);
+ xe_gt_assert(gt, g2h_len || !num_g2h);
lockdep_assert_held(&ct->lock);
if (unlikely(ct->ctbs.h2g.info.broken)) {
@@ -570,7 +577,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
goto out;
}
- xe_assert(xe, xe_guc_ct_enabled(ct));
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
if (g2h_fence) {
g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
@@ -628,12 +635,12 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 g2h_len, u32 num_g2h,
struct g2h_fence *g2h_fence)
{
- struct drm_device *drm = &ct_to_xe(ct)->drm;
- struct drm_printer p = drm_info_printer(drm->dev);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct drm_printer p = xe_gt_info_printer(gt);
unsigned int sleep_period_ms = 1;
int ret;
- xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
lockdep_assert_held(&ct->lock);
xe_device_assert_mem_access(ct_to_xe(ct));
@@ -691,7 +698,7 @@ try_again:
return ret;
broken:
- drm_err(drm, "No forward process on H2G, reset required");
+ xe_gt_err(gt, "No forward process on H2G, reset required\n");
xe_guc_ct_print(ct, &p, true);
ct->ctbs.h2g.info.broken = true;
@@ -703,7 +710,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
{
int ret;
- xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+ xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
mutex_lock(&ct->lock);
ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
@@ -771,7 +778,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buffer, bool no_fail)
{
- struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
struct g2h_fence g2h_fence;
int ret = 0;
@@ -813,20 +820,20 @@ retry_same_fence:
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
if (!ret) {
- drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
- g2h_fence.seqno, action[0]);
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
+ g2h_fence.seqno, action[0]);
xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
return -ETIME;
}
if (g2h_fence.retry) {
- drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d",
- action[0], g2h_fence.reason);
+ xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u",
+ action[0], g2h_fence.reason);
goto retry;
}
if (g2h_fence.fail) {
- drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d",
- action[0], g2h_fence.error, g2h_fence.hint);
+ xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u",
+ action[0], g2h_fence.error, g2h_fence.hint);
ret = -EIO;
}
@@ -895,7 +902,6 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_gt *gt = ct_to_gt(ct);
- struct xe_device *xe = gt_to_xe(gt);
u32 *hxg = msg_to_hxg(msg);
u32 hxg_len = msg_len_to_hxg_len(len);
u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
@@ -933,7 +939,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
return 0;
}
- xe_assert(xe, fence == g2h_fence->seqno);
+ xe_gt_assert(gt, fence == g2h_fence->seqno);
if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
g2h_fence->fail = true;
@@ -961,7 +967,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
- struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
u32 *hxg = msg_to_hxg(msg);
u32 origin, type;
int ret;
@@ -970,9 +976,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
- drm_err(&xe->drm,
- "G2H channel broken on read, origin=%d, reset required\n",
- origin);
+ xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
+ origin);
ct->ctbs.g2h.info.broken = true;
return -EPROTO;
@@ -989,9 +994,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = parse_g2h_response(ct, msg, len);
break;
default:
- drm_err(&xe->drm,
- "G2H channel broken on read, type=%d, reset required\n",
- type);
+ xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
+ type);
ct->ctbs.g2h.info.broken = true;
ret = -EOPNOTSUPP;
@@ -1002,8 +1006,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
- struct xe_device *xe = ct_to_xe(ct);
struct xe_guc *guc = ct_to_guc(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
u32 hxg_len = msg_len_to_hxg_len(len);
u32 *hxg = msg_to_hxg(msg);
u32 action, adj_len;
@@ -1054,18 +1058,21 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
adj_len);
break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
- ret = xe_guc_relay_process_guc2pf(&guc->relay, payload, adj_len);
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
break;
case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
- ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len);
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
+ ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
break;
default:
- drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
+ xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
if (ret)
- drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
- action, ret);
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
return 0;
}
@@ -1073,13 +1080,14 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
{
struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
struct guc_ctb *g2h = &ct->ctbs.g2h;
u32 tail, head, len;
s32 avail;
u32 action;
u32 *hxg;
- xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
lockdep_assert_held(&ct->fast_lock);
if (ct->state == XE_GUC_CT_STATE_DISABLED)
@@ -1091,7 +1099,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
if (g2h->info.broken)
return -EPIPE;
- xe_assert(xe, xe_guc_ct_enabled(ct));
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
/* Calculate DW available to read */
tail = desc_read(xe, g2h, tail);
@@ -1107,9 +1115,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
sizeof(u32));
len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
if (len > avail) {
- drm_err(&xe->drm,
- "G2H channel broken on read, avail=%d, len=%d, reset required\n",
- avail, len);
+ xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
+ avail, len);
g2h->info.broken = true;
return -EPROTO;
@@ -1162,7 +1169,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
- struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
struct xe_guc *guc = ct_to_guc(ct);
u32 hxg_len = msg_len_to_hxg_len(len);
u32 *hxg = msg_to_hxg(msg);
@@ -1181,12 +1188,12 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
adj_len);
break;
default:
- drm_warn(&xe->drm, "NOT_POSSIBLE");
+ xe_gt_warn(gt, "NOT_POSSIBLE");
}
if (ret)
- drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
- action, ret);
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
}
/**
@@ -1203,7 +1210,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
bool ongoing;
int len;
- ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
return;
@@ -1216,7 +1223,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
spin_unlock(&ct->fast_lock);
if (ongoing)
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
}
/* Returns less than zero on error, 0 on done, 1 on more available */
@@ -1247,6 +1254,7 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct)
static void g2h_worker_func(struct work_struct *w)
{
struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+ struct xe_gt *gt = ct_to_gt(ct);
bool ongoing;
int ret;
@@ -1273,7 +1281,7 @@ static void g2h_worker_func(struct work_struct *w)
* responses, if the worker here is blocked on those callbacks
* completing, creating a deadlock.
*/
- ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
return;
@@ -1283,8 +1291,7 @@ static void g2h_worker_func(struct work_struct *w)
mutex_unlock(&ct->lock);
if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
- struct drm_device *drm = &ct_to_xe(ct)->drm;
- struct drm_printer p = drm_info_printer(drm->dev);
+ struct drm_printer p = xe_gt_info_printer(gt);
xe_guc_ct_print(ct, &p, false);
kick_reset(ct);
@@ -1292,7 +1299,7 @@ static void g2h_worker_func(struct work_struct *w)
} while (ret == 1);
if (ongoing)
- xe_device_mem_access_put(ct_to_xe(ct));
+ xe_pm_runtime_put(ct_to_xe(ct));
}
static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
@@ -1394,7 +1401,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
return NULL;
}
- if (xe_guc_ct_enabled(ct)) {
+ if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
snapshot->ct_enabled = true;
snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index ffd7d53bcc42..d3822cbea273 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -14,6 +14,7 @@
#include "xe_guc_ct.h"
#include "xe_guc_log.h"
#include "xe_macros.h"
+#include "xe_pm.h"
static struct xe_guc *node_to_guc(struct drm_info_node *node)
{
@@ -26,9 +27,9 @@ static int guc_info(struct seq_file *m, void *data)
struct xe_device *xe = guc_to_xe(guc);
struct drm_printer p = drm_seq_file_printer(m);
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get(xe);
xe_guc_print_info(guc, &p);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return 0;
}
@@ -39,9 +40,9 @@ static int guc_log(struct seq_file *m, void *data)
struct xe_device *xe = guc_to_xe(guc);
struct drm_printer p = drm_seq_file_printer(m);
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get(xe);
xe_guc_log_print(&guc->log, &p);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index c281fdbfd2d6..19ee71aeaf17 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -14,6 +14,8 @@
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
#define G2H_LEN_DW_TLB_INVALIDATE 3
+#define GUC_ID_MAX 65535
+
#define GUC_CONTEXT_DISABLE 0
#define GUC_CONTEXT_ENABLE 1
@@ -207,7 +209,10 @@ struct guc_ads {
u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
- u32 reserved[14];
+ u32 wa_klv_addr_lo;
+ u32 wa_klv_addr_hi;
+ u32 wa_klv_size;
+ u32 reserved[11];
} __packed;
/* Engine usage stats */
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index ea49f3885c10..d9b570a154a2 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -14,7 +14,7 @@
#include "xe_guc.h"
#include "xe_map.h"
-static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
+static int send_get_hwconfig(struct xe_guc *guc, u64 ggtt_addr, u32 size)
{
u32 action[] = {
XE_GUC_ACTION_GET_HWCONFIG,
@@ -78,8 +78,9 @@ int xe_guc_hwconfig_init(struct xe_guc *guc)
return -EINVAL;
bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size),
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(bo))
return PTR_ERR(bo);
guc->hwconfig.bo = bo;
diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
new file mode 100644
index 000000000000..0fb7c6b78c31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_id_mgr.h"
+#include "xe_guc_types.h"
+
+static struct xe_guc *idm_to_guc(struct xe_guc_id_mgr *idm)
+{
+ return container_of(idm, struct xe_guc, submission_state.idm);
+}
+
+static struct xe_gt *idm_to_gt(struct xe_guc_id_mgr *idm)
+{
+ return guc_to_gt(idm_to_guc(idm));
+}
+
+static struct xe_device *idm_to_xe(struct xe_guc_id_mgr *idm)
+{
+ return gt_to_xe(idm_to_gt(idm));
+}
+
+#define idm_assert(idm, cond) xe_gt_assert(idm_to_gt(idm), cond)
+#define idm_mutex(idm) (&idm_to_guc(idm)->submission_state.lock)
+
+static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent);
+
+static void __fini_idm(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_id_mgr *idm = arg;
+
+ mutex_lock(idm_mutex(idm));
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ unsigned int weight = bitmap_weight(idm->bitmap, idm->total);
+
+ if (weight) {
+ struct drm_printer p = xe_gt_info_printer(idm_to_gt(idm));
+
+ xe_gt_err(idm_to_gt(idm), "GUC ID manager unclean (%u/%u)\n",
+ weight, idm->total);
+ idm_print_locked(idm, &p, 1);
+ }
+ }
+
+ bitmap_free(idm->bitmap);
+ idm->bitmap = NULL;
+ idm->total = 0;
+ idm->used = 0;
+
+ mutex_unlock(idm_mutex(idm));
+}
+
+/**
+ * xe_guc_id_mgr_init() - Initialize GuC context ID Manager.
+ * @idm: the &xe_guc_id_mgr to initialize
+ * @limit: number of IDs to manage
+ *
+ * The bare-metal or PF driver can pass ~0 as &limit to indicate that all
+ * context IDs supported by the GuC firmware are available for use.
+ *
+ * Only VF drivers will have to provide explicit number of context IDs
+ * that they can use.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit)
+{
+ int ret;
+
+ idm_assert(idm, !idm->bitmap);
+ idm_assert(idm, !idm->total);
+ idm_assert(idm, !idm->used);
+
+ if (limit == ~0)
+ limit = GUC_ID_MAX;
+ else if (limit > GUC_ID_MAX)
+ return -ERANGE;
+ else if (!limit)
+ return -EINVAL;
+
+ idm->bitmap = bitmap_zalloc(limit, GFP_KERNEL);
+ if (!idm->bitmap)
+ return -ENOMEM;
+ idm->total = limit;
+
+ ret = drmm_add_action_or_reset(&idm_to_xe(idm)->drm, __fini_idm, idm);
+ if (ret)
+ return ret;
+
+ xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total);
+ return 0;
+}
+
+static unsigned int find_last_zero_area(unsigned long *bitmap,
+ unsigned int total,
+ unsigned int count)
+{
+ unsigned int found = total;
+ unsigned int rs, re, range;
+
+ for_each_clear_bitrange(rs, re, bitmap, total) {
+ range = re - rs;
+ if (range < count)
+ continue;
+ found = rs + (range - count);
+ }
+ return found;
+}
+
+static int idm_reserve_chunk_locked(struct xe_guc_id_mgr *idm,
+ unsigned int count, unsigned int retain)
+{
+ int id;
+
+ idm_assert(idm, count);
+ lockdep_assert_held(idm_mutex(idm));
+
+ if (!idm->total)
+ return -ENODATA;
+
+ if (retain) {
+ /*
+ * For IDs reservations (used on PF for VFs) we want to make
+ * sure there will be at least 'retain' available for the PF
+ */
+ if (idm->used + count + retain > idm->total)
+ return -EDQUOT;
+ /*
+ * ... and we want to reserve highest IDs close to the end.
+ */
+ id = find_last_zero_area(idm->bitmap, idm->total, count);
+ } else {
+ /*
+ * For regular IDs reservations (used by submission code)
+ * we start searching from the lower range of IDs.
+ */
+ id = bitmap_find_next_zero_area(idm->bitmap, idm->total, 0, count, 0);
+ }
+ if (id >= idm->total)
+ return -ENOSPC;
+
+ bitmap_set(idm->bitmap, id, count);
+ idm->used += count;
+
+ return id;
+}
+
+static void idm_release_chunk_locked(struct xe_guc_id_mgr *idm,
+ unsigned int start, unsigned int count)
+{
+ idm_assert(idm, count);
+ idm_assert(idm, count <= idm->used);
+ idm_assert(idm, start < idm->total);
+ idm_assert(idm, start + count - 1 < idm->total);
+ lockdep_assert_held(idm_mutex(idm));
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ unsigned int n;
+
+ for (n = 0; n < count; n++)
+ idm_assert(idm, test_bit(start + n, idm->bitmap));
+ }
+ bitmap_clear(idm->bitmap, start, count);
+ idm->used -= count;
+}
+
+/**
+ * xe_guc_id_mgr_reserve_locked() - Reserve one or more GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @count: number of IDs to allocate (can't be 0)
+ *
+ * This function is dedicated for the use by the GuC submission code,
+ * where submission lock is already taken.
+ *
+ * Return: ID of allocated GuC context or a negative error code on failure.
+ */
+int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count)
+{
+ return idm_reserve_chunk_locked(idm, count, 0);
+}
+
+/**
+ * xe_guc_id_mgr_release_locked() - Release one or more GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @id: the GuC context ID to release
+ * @count: number of IDs to release (can't be 0)
+ *
+ * This function is dedicated for the use by the GuC submission code,
+ * where submission lock is already taken.
+ */
+void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id,
+ unsigned int count)
+{
+ return idm_release_chunk_locked(idm, id, count);
+}
+
+/**
+ * xe_guc_id_mgr_reserve() - Reserve a range of GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @count: number of GuC context IDs to reserve (can't be 0)
+ * @retain: number of GuC context IDs to keep available (can't be 0)
+ *
+ * This function is dedicated for the use by the PF driver which expects that
+ * reserved range of IDs will be contiguous and that there will be at least
+ * &retain IDs still available for the PF after this reservation.
+ *
+ * Return: starting ID of the allocated GuC context ID range or
+ * a negative error code on failure.
+ */
+int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm,
+ unsigned int count, unsigned int retain)
+{
+ int ret;
+
+ idm_assert(idm, count);
+ idm_assert(idm, retain);
+
+ mutex_lock(idm_mutex(idm));
+ ret = idm_reserve_chunk_locked(idm, count, retain);
+ mutex_unlock(idm_mutex(idm));
+
+ return ret;
+}
+
+/**
+ * xe_guc_id_mgr_release() - Release a range of GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @start: the starting ID of GuC context range to release
+ * @count: number of GuC context IDs to release
+ */
+void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm,
+ unsigned int start, unsigned int count)
+{
+ mutex_lock(idm_mutex(idm));
+ idm_release_chunk_locked(idm, start, count);
+ mutex_unlock(idm_mutex(idm));
+}
+
+static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent)
+{
+ unsigned int rs, re;
+
+ lockdep_assert_held(idm_mutex(idm));
+
+ drm_printf_indent(p, indent, "total %u\n", idm->total);
+ if (!idm->bitmap)
+ return;
+
+ drm_printf_indent(p, indent, "used %u\n", idm->used);
+ for_each_set_bitrange(rs, re, idm->bitmap, idm->total)
+ drm_printf_indent(p, indent, "range %u..%u (%u)\n", rs, re - 1, re - rs);
+}
+
+/**
+ * xe_guc_id_mgr_print() - Print status of GuC ID Manager.
+ * @idm: the &xe_guc_id_mgr to print
+ * @p: the &drm_printer to print to
+ * @indent: tab indentation level
+ */
+void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent)
+{
+ mutex_lock(idm_mutex(idm));
+ idm_print_locked(idm, p, indent);
+ mutex_unlock(idm_mutex(idm));
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_id_mgr_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.h b/drivers/gpu/drm/xe/xe_guc_id_mgr.h
new file mode 100644
index 000000000000..368f8c80e4c7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ID_MGR_H_
+#define _XE_GUC_ID_MGR_H_
+
+struct drm_printer;
+struct xe_guc_id_mgr;
+
+int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int count);
+
+int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count);
+void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id, unsigned int count);
+
+int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm, unsigned int count, unsigned int retain);
+void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm, unsigned int start, unsigned int count);
+
+void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
new file mode 100644
index 000000000000..ceca949932a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <drm/drm_print.h>
+
+#include "abi/guc_klvs_abi.h"
+#include "xe_guc_klv_helpers.h"
+
+#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
+
+/**
+ * xe_guc_klv_key_to_string - Convert KLV key into friendly name.
+ * @key: the `GuC KLV`_ key
+ *
+ * Return: name of the KLV key.
+ */
+const char *xe_guc_klv_key_to_string(u16 key)
+{
+ switch (key) {
+ /* VGT POLICY keys */
+ case GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY:
+ return "sched_if_idle";
+ case GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY:
+ return "sample_period";
+ case GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY:
+ return "reset_engine";
+ /* VF CFG keys */
+ case GUC_KLV_VF_CFG_GGTT_START_KEY:
+ return "ggtt_start";
+ case GUC_KLV_VF_CFG_GGTT_SIZE_KEY:
+ return "ggtt_size";
+ case GUC_KLV_VF_CFG_LMEM_SIZE_KEY:
+ return "lmem_size";
+ case GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY:
+ return "num_contexts";
+ case GUC_KLV_VF_CFG_TILE_MASK_KEY:
+ return "tile_mask";
+ case GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY:
+ return "num_doorbells";
+ case GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY:
+ return "exec_quantum";
+ case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY:
+ return "preempt_timeout";
+ case GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY:
+ return "begin_db_id";
+ case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY:
+ return "begin_ctx_id";
+ default:
+ return "(unknown)";
+ }
+}
+
+/**
+ * xe_guc_klv_print - Print content of the buffer with `GuC KLV`_.
+ * @klvs: the buffer with KLVs
+ * @num_dwords: number of dwords (u32) available in the buffer
+ * @p: the &drm_printer
+ *
+ * The buffer may contain more than one KLV.
+ */
+void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p)
+{
+ while (num_dwords >= GUC_KLV_LEN_MIN) {
+ u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]);
+ u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]);
+
+ klvs += GUC_KLV_LEN_MIN;
+ num_dwords -= GUC_KLV_LEN_MIN;
+
+ if (num_dwords < len) {
+ drm_printf(p, "{ key %#06x : truncated %zu of %zu bytes %*ph } # %s\n",
+ key, num_dwords * sizeof(u32), len * sizeof(u32),
+ (int)(num_dwords * sizeof(u32)), klvs,
+ xe_guc_klv_key_to_string(key));
+ return;
+ }
+
+ switch (len) {
+ case 0:
+ drm_printf(p, "{ key %#06x : no value } # %s\n",
+ key, xe_guc_klv_key_to_string(key));
+ break;
+ case 1:
+ drm_printf(p, "{ key %#06x : 32b value %u } # %s\n",
+ key, klvs[0], xe_guc_klv_key_to_string(key));
+ break;
+ case 2:
+ drm_printf(p, "{ key %#06x : 64b value %#llx } # %s\n",
+ key, make_u64(klvs[1], klvs[0]),
+ xe_guc_klv_key_to_string(key));
+ break;
+ default:
+ drm_printf(p, "{ key %#06x : %zu bytes %*ph } # %s\n",
+ key, len * sizeof(u32), (int)(len * sizeof(u32)),
+ klvs, xe_guc_klv_key_to_string(key));
+ break;
+ }
+
+ klvs += len;
+ num_dwords -= len;
+ }
+
+ /* we don't expect any leftovers, fix if KLV header is ever changed */
+ BUILD_BUG_ON(GUC_KLV_LEN_MIN > 1);
+}
+
+/**
+ * xe_guc_klv_count - Count KLVs present in the buffer.
+ * @klvs: the buffer with KLVs
+ * @num_dwords: number of dwords (u32) in the buffer
+ *
+ * Return: number of recognized KLVs or
+ * a negative error code if KLV buffer is truncated.
+ */
+int xe_guc_klv_count(const u32 *klvs, u32 num_dwords)
+{
+ int num_klvs = 0;
+
+ while (num_dwords >= GUC_KLV_LEN_MIN) {
+ u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]);
+
+ if (num_dwords < len + GUC_KLV_LEN_MIN)
+ break;
+
+ klvs += GUC_KLV_LEN_MIN + len;
+ num_dwords -= GUC_KLV_LEN_MIN + len;
+ num_klvs++;
+ }
+
+ return num_dwords ? -ENODATA : num_klvs;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
new file mode 100644
index 000000000000..b835e0ebe6db
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_KLV_HELPERS_H_
+#define _XE_GUC_KLV_HELPERS_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+
+const char *xe_guc_klv_key_to_string(u16 key);
+
+void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p);
+int xe_guc_klv_count(const u32 *klvs, u32 num_dwords);
+
+/**
+ * PREP_GUC_KLV - Prepare KLV header value based on provided key and len.
+ * @key: KLV key
+ * @len: KLV length
+ *
+ * Return: value of the KLV header (u32).
+ */
+#define PREP_GUC_KLV(key, len) \
+ (FIELD_PREP(GUC_KLV_0_KEY, (key)) | \
+ FIELD_PREP(GUC_KLV_0_LEN, (len)))
+
+/**
+ * PREP_GUC_KLV_CONST - Prepare KLV header value based on const key and len.
+ * @key: const KLV key
+ * @len: const KLV length
+ *
+ * Return: value of the KLV header (u32).
+ */
+#define PREP_GUC_KLV_CONST(key, len) \
+ (FIELD_PREP_CONST(GUC_KLV_0_KEY, (key)) | \
+ FIELD_PREP_CONST(GUC_KLV_0_LEN, (len)))
+
+/**
+ * PREP_GUC_KLV_TAG - Prepare KLV header value based on unique KLV definition tag.
+ * @TAG: unique tag of the KLV definition
+ *
+ * Combine separate KEY and LEN definitions of the KLV identified by the TAG.
+ *
+ * Return: value of the KLV header (u32).
+ */
+#define PREP_GUC_KLV_TAG(TAG) \
+ PREP_GUC_KLV_CONST(GUC_KLV_##TAG##_KEY, GUC_KLV_##TAG##_LEN)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 45135c3520e5..a37ee3419428 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -84,8 +84,9 @@ int xe_guc_log_init(struct xe_guc_log *log)
struct xe_bo *bo;
bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 2839d685631b..509649d0e65e 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -145,25 +145,6 @@ static int pc_action_reset(struct xe_guc_pc *pc)
return ret;
}
-static int pc_action_shutdown(struct xe_guc_pc *pc)
-{
- struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
- int ret;
- u32 action[] = {
- GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
- SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2),
- xe_bo_ggtt_addr(pc->bo),
- 0,
- };
-
- ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
- drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe",
- ERR_PTR(ret));
-
- return ret;
-}
-
static int pc_action_query_task_state(struct xe_guc_pc *pc)
{
struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
@@ -381,8 +362,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
struct xe_device *xe = gt_to_xe(gt);
u32 freq;
- xe_device_mem_access_get(gt_to_xe(gt));
-
/* When in RC6, actual frequency reported will be 0. */
if (GRAPHICS_VERx100(xe) >= 1270) {
freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
@@ -394,8 +373,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
freq = decode_freq(freq);
- xe_device_mem_access_put(gt_to_xe(gt));
-
return freq;
}
@@ -412,14 +389,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
struct xe_gt *gt = pc_to_gt(pc);
int ret;
- xe_device_mem_access_get(gt_to_xe(gt));
/*
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (ret)
- goto out;
+ return ret;
*freq = xe_mmio_read32(gt, RPNSWREQ);
@@ -427,9 +403,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
*freq = decode_freq(*freq);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out:
- xe_device_mem_access_put(gt_to_xe(gt));
- return ret;
+ return 0;
}
/**
@@ -451,12 +425,7 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
*/
u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
{
- struct xe_gt *gt = pc_to_gt(pc);
- struct xe_device *xe = gt_to_xe(gt);
-
- xe_device_mem_access_get(xe);
pc_update_rp_values(pc);
- xe_device_mem_access_put(xe);
return pc->rpe_freq;
}
@@ -485,7 +454,6 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
struct xe_gt *gt = pc_to_gt(pc);
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -511,7 +479,6 @@ fw:
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -528,7 +495,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
{
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -544,8 +510,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
-
return ret;
}
@@ -561,7 +525,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
{
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -577,7 +540,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -594,7 +556,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -610,7 +571,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -623,8 +583,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 reg, gt_c_state;
- xe_device_mem_access_get(gt_to_xe(gt));
-
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
@@ -633,8 +591,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
}
- xe_device_mem_access_put(gt_to_xe(gt));
-
switch (gt_c_state) {
case GT_C6:
return GT_IDLE_C6;
@@ -654,9 +610,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
- xe_device_mem_access_get(gt_to_xe(gt));
reg = xe_mmio_read32(gt, GT_GFX_RC6);
- xe_device_mem_access_put(gt_to_xe(gt));
return reg;
}
@@ -670,9 +624,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u64 reg;
- xe_device_mem_access_get(gt_to_xe(gt));
reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
- xe_device_mem_access_put(gt_to_xe(gt));
return reg;
}
@@ -743,24 +695,28 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
ret = pc_action_query_task_state(pc);
if (ret)
- return ret;
+ goto out;
/*
* GuC defaults to some RPmax that is not actually achievable without
* overclocking. Let's adjust it to the Hardware RP0, which is the
* regular maximum
*/
- if (pc_get_max_freq(pc) > pc->rp0_freq)
- pc_set_max_freq(pc, pc->rp0_freq);
+ if (pc_get_max_freq(pc) > pc->rp0_freq) {
+ ret = pc_set_max_freq(pc, pc->rp0_freq);
+ if (ret)
+ goto out;
+ }
/*
* Same thing happens for Server platforms where min is listed as
* RPMax
*/
if (pc_get_min_freq(pc) > pc->rp0_freq)
- pc_set_min_freq(pc, pc->rp0_freq);
+ ret = pc_set_min_freq(pc, pc->rp0_freq);
- return 0;
+out:
+ return ret;
}
static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
@@ -801,23 +757,19 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
if (xe->info.skip_guc_pc)
return 0;
- xe_device_mem_access_get(pc_to_xe(pc));
-
ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
if (ret)
- goto out;
+ return ret;
ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (ret)
- goto out;
+ return ret;
xe_gt_idle_disable_c6(gt);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out:
- xe_device_mem_access_put(pc_to_xe(pc));
- return ret;
+ return 0;
}
static void pc_init_pcode_freq(struct xe_guc_pc *pc)
@@ -870,11 +822,9 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- xe_device_mem_access_get(pc_to_xe(pc));
-
ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (ret)
- goto out_fail_force_wake;
+ return ret;
if (xe->info.skip_guc_pc) {
if (xe->info.platform != XE_PVC)
@@ -914,8 +864,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
out:
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out_fail_force_wake:
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -926,32 +874,17 @@ out_fail_force_wake:
int xe_guc_pc_stop(struct xe_guc_pc *pc)
{
struct xe_device *xe = pc_to_xe(pc);
- int ret;
-
- xe_device_mem_access_get(pc_to_xe(pc));
if (xe->info.skip_guc_pc) {
xe_gt_idle_disable_c6(pc_to_gt(pc));
- ret = 0;
- goto out;
+ return 0;
}
mutex_lock(&pc->freq_lock);
pc->freq_ready = false;
mutex_unlock(&pc->freq_lock);
- ret = pc_action_shutdown(pc);
- if (ret)
- goto out;
-
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) {
- drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
- ret = -EIO;
- }
-
-out:
- xe_device_mem_access_put(pc_to_xe(pc));
- return ret;
+ return 0;
}
/**
@@ -965,13 +898,11 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
struct xe_device *xe = pc_to_xe(pc);
if (xe->info.skip_guc_pc) {
- xe_device_mem_access_get(xe);
xe_gt_idle_disable_c6(pc_to_gt(pc));
- xe_device_mem_access_put(xe);
return;
}
- xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
+ XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
XE_WARN_ON(xe_guc_pc_stop(pc));
xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
@@ -998,16 +929,13 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
return err;
bo = xe_managed_bo_create_pin_map(xe, tile, size,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(bo))
return PTR_ERR(bo);
pc->bo = bo;
- err = drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc);
- if (err)
- return err;
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index e2a4c3b5e9ff..c7d38469fb46 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -27,6 +27,7 @@
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_id_mgr.h"
#include "xe_guc_submit_types.h"
#include "xe_hw_engine.h"
#include "xe_hw_fence.h"
@@ -236,17 +237,9 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
struct xe_guc *guc = arg;
xa_destroy(&guc->submission_state.exec_queue_lookup);
- ida_destroy(&guc->submission_state.guc_ids);
- bitmap_free(guc->submission_state.guc_ids_bitmap);
free_submit_wq(guc);
- mutex_destroy(&guc->submission_state.lock);
}
-#define GUC_ID_MAX 65535
-#define GUC_ID_NUMBER_MLRC 4096
-#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
-#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC
-
static const struct xe_exec_queue_ops guc_exec_queue_ops;
static void primelockdep(struct xe_guc *guc)
@@ -269,33 +262,28 @@ int xe_guc_submit_init(struct xe_guc *guc)
struct xe_gt *gt = guc_to_gt(guc);
int err;
- guc->submission_state.guc_ids_bitmap =
- bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
- return -ENOMEM;
+ err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
+ if (err)
+ return err;
+
+ err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0);
+ if (err)
+ return err;
err = alloc_submit_wq(guc);
- if (err) {
- bitmap_free(guc->submission_state.guc_ids_bitmap);
+ if (err)
return err;
- }
gt->exec_queue_ops = &guc_exec_queue_ops;
- mutex_init(&guc->submission_state.lock);
xa_init(&guc->submission_state.exec_queue_lookup);
- ida_init(&guc->submission_state.guc_ids);
spin_lock_init(&guc->submission_state.suspend.lock);
guc->submission_state.suspend.context = dma_fence_context_alloc(1);
primelockdep(guc);
- err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
- if (err)
- return err;
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
}
static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
@@ -307,12 +295,8 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
for (i = 0; i < xa_count; ++i)
xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
- if (xe_exec_queue_is_parallel(q))
- bitmap_release_region(guc->submission_state.guc_ids_bitmap,
- q->guc->id - GUC_ID_START_MLRC,
- order_base_2(q->width));
- else
- ida_free(&guc->submission_state.guc_ids, q->guc->id);
+ xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
+ q->guc->id, q->width);
}
static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
@@ -330,21 +314,12 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
*/
lockdep_assert_held(&guc->submission_state.lock);
- if (xe_exec_queue_is_parallel(q)) {
- void *bitmap = guc->submission_state.guc_ids_bitmap;
-
- ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
- order_base_2(q->width));
- } else {
- ret = ida_alloc_max(&guc->submission_state.guc_ids,
- GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT);
- }
+ ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
+ q->width);
if (ret < 0)
return ret;
q->guc->id = ret;
- if (xe_exec_queue_is_parallel(q))
- q->guc->id += GUC_ID_START_MLRC;
for (i = 0; i < q->width; ++i) {
ptr = xa_store(&guc->submission_state.exec_queue_lookup,
@@ -533,7 +508,7 @@ static void register_engine(struct xe_exec_queue *q)
info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
if (xe_exec_queue_is_parallel(q)) {
- u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
+ u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
struct iosys_map map = xe_lrc_parallel_map(lrc);
info.wq_desc_lo = lower_32_bits(ggtt_addr +
@@ -833,7 +808,9 @@ static void simple_error_capture(struct xe_exec_queue *q)
}
}
- xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+ if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL))
+ xe_gt_info(guc_to_gt(guc),
+ "failed to get forcewake for error capture");
xe_guc_ct_print(&guc->ct, &p, true);
guc_exec_queue_print(q, &p);
for_each_hw_engine(hwe, guc_to_gt(guc), id) {
@@ -929,20 +906,26 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int err = -ETIME;
int i = 0;
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
- drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
- xe_sched_job_seqno(job), q->guc->id, q->flags);
- xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
- "Kernel-submitted job timed out\n");
- xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
- "VM job timed out on non-killed execqueue\n");
+ /*
+ * TDR has fired before free job worker. Common if exec queue
+ * immediately closed after last fence signaled.
+ */
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
+ guc_exec_queue_free_job(drm_job);
- simple_error_capture(q);
- xe_devcoredump(job);
- } else {
- drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
- xe_sched_job_seqno(job), q->guc->id, q->flags);
+ return DRM_GPU_SCHED_STAT_NOMINAL;
}
+
+ drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
+ xe_sched_job_seqno(job), q->guc->id, q->flags);
+ xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
+ "Kernel-submitted job timed out\n");
+ xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
+ "VM job timed out on non-killed execqueue\n");
+
+ simple_error_capture(q);
+ xe_devcoredump(job);
+
trace_xe_sched_job_timedout(job);
/* Kill the run_job entry point */
@@ -1568,28 +1551,8 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
}
-int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q)
{
- struct xe_device *xe = guc_to_xe(guc);
- struct xe_exec_queue *q;
- u32 guc_id = msg[0];
-
- if (unlikely(len < 2)) {
- drm_err(&xe->drm, "Invalid length %u", len);
- return -EPROTO;
- }
-
- q = g2h_exec_queue_lookup(guc, guc_id);
- if (unlikely(!q))
- return -EPROTO;
-
- if (unlikely(!exec_queue_pending_enable(q) &&
- !exec_queue_pending_disable(q))) {
- drm_err(&xe->drm, "Unexpected engine state 0x%04x",
- atomic_read(&q->guc->state));
- return -EPROTO;
- }
-
trace_xe_exec_queue_scheduling_done(q);
if (exec_queue_pending_enable(q)) {
@@ -1609,17 +1572,15 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
deregister_exec_queue(guc, q);
}
}
-
- return 0;
}
-int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
u32 guc_id = msg[0];
- if (unlikely(len < 1)) {
+ if (unlikely(len < 2)) {
drm_err(&xe->drm, "Invalid length %u", len);
return -EPROTO;
}
@@ -1628,13 +1589,20 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
if (unlikely(!q))
return -EPROTO;
- if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
- exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
+ if (unlikely(!exec_queue_pending_enable(q) &&
+ !exec_queue_pending_disable(q))) {
drm_err(&xe->drm, "Unexpected engine state 0x%04x",
atomic_read(&q->guc->state));
return -EPROTO;
}
+ handle_sched_done(guc, q);
+
+ return 0;
+}
+
+static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
+{
trace_xe_exec_queue_deregister_done(q);
clear_exec_queue_registered(q);
@@ -1643,6 +1611,31 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
xe_exec_queue_put(q);
else
__guc_exec_queue_fini(guc, q);
+}
+
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_exec_queue *q;
+ u32 guc_id = msg[0];
+
+ if (unlikely(len < 1)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ q = g2h_exec_queue_lookup(guc, guc_id);
+ if (unlikely(!q))
+ return -EPROTO;
+
+ if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
+ exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
+ drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+ atomic_read(&q->guc->state));
+ return -EPROTO;
+ }
+
+ handle_deregister_done(guc, q);
return 0;
}
@@ -1782,7 +1775,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
/**
* xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
- * @job: faulty Xe scheduled job.
+ * @q: faulty exec queue
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -1791,9 +1784,8 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
* caller, using `xe_guc_exec_queue_snapshot_free`.
*/
struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
{
- struct xe_exec_queue *q = job->q;
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_guc_submit_exec_queue_snapshot *snapshot;
int i;
@@ -1814,21 +1806,14 @@ xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
snapshot->sched_props.preempt_timeout_us =
q->sched_props.preempt_timeout_us;
- snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
+ snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
GFP_ATOMIC);
if (snapshot->lrc) {
for (i = 0; i < q->width; ++i) {
struct xe_lrc *lrc = q->lrc + i;
- snapshot->lrc[i].context_desc =
- lower_32_bits(xe_lrc_ggtt_addr(lrc));
- snapshot->lrc[i].head = xe_lrc_ring_head(lrc);
- snapshot->lrc[i].tail.internal = lrc->ring.tail;
- snapshot->lrc[i].tail.memory =
- xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
- snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc);
- snapshot->lrc[i].seqno = xe_lrc_seqno(lrc);
+ snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
}
}
@@ -1867,6 +1852,24 @@ xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
}
/**
+ * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
+ * @snapshot: Previously captured snapshot of job.
+ *
+ * This captures some data that requires taking some locks, so it cannot be done in signaling path.
+ */
+void
+xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
+{
+ int i;
+
+ if (!snapshot || !snapshot->lrc)
+ return;
+
+ for (i = 0; i < snapshot->width; ++i)
+ xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
+}
+
+/**
* xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
* @snapshot: GuC Submit Engine snapshot object.
* @p: drm_printer where it will be printed out.
@@ -1894,18 +1897,9 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
drm_printf(p, "\tPreempt timeout: %u (us)\n",
snapshot->sched_props.preempt_timeout_us);
- for (i = 0; snapshot->lrc && i < snapshot->width; ++i) {
- drm_printf(p, "\tHW Context Desc: 0x%08x\n",
- snapshot->lrc[i].context_desc);
- drm_printf(p, "\tLRC Head: (memory) %u\n",
- snapshot->lrc[i].head);
- drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
- snapshot->lrc[i].tail.internal,
- snapshot->lrc[i].tail.memory);
- drm_printf(p, "\tStart seqno: (memory) %d\n",
- snapshot->lrc[i].start_seqno);
- drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
- }
+ for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
+ xe_lrc_snapshot_print(snapshot->lrc[i], p);
+
drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
@@ -1930,10 +1924,16 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
*/
void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
{
+ int i;
+
if (!snapshot)
return;
- kfree(snapshot->lrc);
+ if (snapshot->lrc) {
+ for (i = 0; i < snapshot->width; i++)
+ xe_lrc_snapshot_free(snapshot->lrc[i]);
+ kfree(snapshot->lrc);
+ }
kfree(snapshot->pending_list);
kfree(snapshot);
}
@@ -1941,28 +1941,10 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s
static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
{
struct xe_guc_submit_exec_queue_snapshot *snapshot;
- struct xe_gpu_scheduler *sched = &q->guc->sched;
- struct xe_sched_job *job;
- bool found = false;
-
- spin_lock(&sched->base.job_list_lock);
- list_for_each_entry(job, &sched->base.pending_list, drm.list) {
- if (job->q == q) {
- xe_sched_job_get(job);
- found = true;
- break;
- }
- }
- spin_unlock(&sched->base.job_list_lock);
- if (!found)
- return;
-
- snapshot = xe_guc_exec_queue_snapshot_capture(job);
+ snapshot = xe_guc_exec_queue_snapshot_capture(q);
xe_guc_exec_queue_snapshot_print(snapshot, p);
xe_guc_exec_queue_snapshot_free(snapshot);
-
- xe_sched_job_put(job);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 723dc2bd8df9..fad0421ead36 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -9,8 +9,8 @@
#include <linux/types.h>
struct drm_printer;
+struct xe_exec_queue;
struct xe_guc;
-struct xe_sched_job;
int xe_guc_submit_init(struct xe_guc *guc);
@@ -27,7 +27,9 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job);
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
+void
+xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot);
void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index 72fc0f42b0a5..dc7456c34583 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -61,17 +61,6 @@ struct guc_submit_parallel_scratch {
u32 wq[WQ_SIZE / sizeof(u32)];
};
-struct lrc_snapshot {
- u32 context_desc;
- u32 head;
- struct {
- u32 internal;
- u32 memory;
- } tail;
- u32 start_seqno;
- u32 seqno;
-};
-
struct pending_list_snapshot {
u32 seqno;
bool fence;
@@ -109,7 +98,7 @@ struct xe_guc_submit_exec_queue_snapshot {
} sched_props;
/** @lrc: LRC Snapshot */
- struct lrc_snapshot *lrc;
+ struct xe_lrc_snapshot **lrc;
/** @schedule_state: Schedule State at the moment of Crash */
u32 schedule_state;
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index edcd1a950bd3..82bd93f7867d 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -32,6 +32,21 @@ struct xe_guc_db_mgr {
};
/**
+ * struct xe_guc_id_mgr - GuC context ID Manager.
+ *
+ * Note: GuC context ID Manager is relying on &xe_guc::submission_state.lock
+ * to protect its members.
+ */
+struct xe_guc_id_mgr {
+ /** @bitmap: bitmap to track allocated IDs */
+ unsigned long *bitmap;
+ /** @total: total number of IDs being managed */
+ unsigned int total;
+ /** @used: number of IDs currently in use */
+ unsigned int used;
+};
+
+/**
* struct xe_guc - Graphic micro controller
*/
struct xe_guc {
@@ -49,12 +64,10 @@ struct xe_guc {
struct xe_guc_db_mgr dbm;
/** @submission_state: GuC submission state */
struct {
+ /** @submission_state.idm: GuC context ID Manager */
+ struct xe_guc_id_mgr idm;
/** @submission_state.exec_queue_lookup: Lookup an xe_engine from guc_id */
struct xarray exec_queue_lookup;
- /** @submission_state.guc_ids: used to allocate new guc_ids, single-lrc */
- struct ida guc_ids;
- /** @submission_state.guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
- unsigned long *guc_ids_bitmap;
/** @submission_state.stopped: submissions are stopped */
atomic_t stopped;
/** @submission_state.lock: protects submission state */
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
new file mode 100644
index 000000000000..2c32dc46f7d4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/mmu_notifier.h>
+#include <linux/dma-mapping.h>
+#include <linux/memremap.h>
+#include <linux/swap.h>
+#include <linux/hmm.h>
+#include <linux/mm.h>
+#include "xe_hmm.h"
+#include "xe_vm.h"
+#include "xe_bo.h"
+
+static u64 xe_npages_in_range(unsigned long start, unsigned long end)
+{
+ return (end - start) >> PAGE_SHIFT;
+}
+
+/*
+ * xe_mark_range_accessed() - mark a range is accessed, so core mm
+ * have such information for memory eviction or write back to
+ * hard disk
+ *
+ * @range: the range to mark
+ * @write: if write to this range, we mark pages in this range
+ * as dirty
+ */
+static void xe_mark_range_accessed(struct hmm_range *range, bool write)
+{
+ struct page *page;
+ u64 i, npages;
+
+ npages = xe_npages_in_range(range->start, range->end);
+ for (i = 0; i < npages; i++) {
+ page = hmm_pfn_to_page(range->hmm_pfns[i]);
+ if (write)
+ set_page_dirty_lock(page);
+
+ mark_page_accessed(page);
+ }
+}
+
+/*
+ * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
+ * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
+ * and will be used to program GPU page table later.
+ *
+ * @xe: the xe device who will access the dma-address in sg table
+ * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
+ * has the pfn numbers of pages that back up this hmm address range.
+ * @st: pointer to the sg table.
+ * @write: whether we write to this range. This decides dma map direction
+ * for system pages. If write we map it bi-diretional; otherwise
+ * DMA_TO_DEVICE
+ *
+ * All the contiguous pfns will be collapsed into one entry in
+ * the scatter gather table. This is for the purpose of efficiently
+ * programming GPU page table.
+ *
+ * The dma_address in the sg table will later be used by GPU to
+ * access memory. So if the memory is system memory, we need to
+ * do a dma-mapping so it can be accessed by GPU/DMA.
+ *
+ * FIXME: This function currently only support pages in system
+ * memory. If the memory is GPU local memory (of the GPU who
+ * is going to access memory), we need gpu dpa (device physical
+ * address), and there is no need of dma-mapping. This is TBD.
+ *
+ * FIXME: dma-mapping for peer gpu device to access remote gpu's
+ * memory. Add this when you support p2p
+ *
+ * This function allocates the storage of the sg table. It is
+ * caller's responsibility to free it calling sg_free_table.
+ *
+ * Returns 0 if successful; -ENOMEM if fails to allocate memory
+ */
+static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
+ struct sg_table *st, bool write)
+{
+ struct device *dev = xe->drm.dev;
+ struct page **pages;
+ u64 i, npages;
+ int ret;
+
+ npages = xe_npages_in_range(range->start, range->end);
+ pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ for (i = 0; i < npages; i++) {
+ pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
+ xe_assert(xe, !is_device_private_page(pages[i]));
+ }
+
+ ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT,
+ xe_sg_segment_size(dev), GFP_KERNEL);
+ if (ret)
+ goto free_pages;
+
+ ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
+ if (ret) {
+ sg_free_table(st);
+ st = NULL;
+ }
+
+free_pages:
+ kvfree(pages);
+ return ret;
+}
+
+/*
+ * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
+ *
+ * @uvma: the userptr vma which hold the scatter gather table
+ *
+ * With function xe_userptr_populate_range, we allocate storage of
+ * the userptr sg table. This is a helper function to free this
+ * sg table, and dma unmap the address in the table.
+ */
+void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
+ bool write = !xe_vma_read_only(vma);
+ struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_device *xe = vm->xe;
+ struct device *dev = xe->drm.dev;
+
+ xe_assert(xe, userptr->sg);
+ dma_unmap_sgtable(dev, userptr->sg,
+ write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
+
+ sg_free_table(userptr->sg);
+ userptr->sg = NULL;
+}
+
+/**
+ * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual
+ * address range
+ *
+ * @uvma: userptr vma which has information of the range to populate.
+ * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller.
+ *
+ * This function populate the physical pages of a virtual
+ * address range. The populated physical pages is saved in
+ * userptr's sg table. It is similar to get_user_pages but call
+ * hmm_range_fault.
+ *
+ * This function also read mmu notifier sequence # (
+ * mmu_interval_read_begin), for the purpose of later
+ * comparison (through mmu_interval_read_retry).
+ *
+ * This must be called with mmap read or write lock held.
+ *
+ * This function allocates the storage of the userptr sg table.
+ * It is caller's responsibility to free it calling sg_free_table.
+ *
+ * returns: 0 for succuss; negative error no on failure
+ */
+int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
+ bool is_mm_mmap_locked)
+{
+ unsigned long timeout =
+ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ unsigned long *pfns, flags = HMM_PFN_REQ_FAULT;
+ struct xe_userptr *userptr;
+ struct xe_vma *vma = &uvma->vma;
+ u64 userptr_start = xe_vma_userptr(vma);
+ u64 userptr_end = userptr_start + xe_vma_size(vma);
+ struct xe_vm *vm = xe_vma_vm(vma);
+ struct hmm_range hmm_range;
+ bool write = !xe_vma_read_only(vma);
+ unsigned long notifier_seq;
+ u64 npages;
+ int ret;
+
+ userptr = &uvma->userptr;
+
+ if (is_mm_mmap_locked)
+ mmap_assert_locked(userptr->notifier.mm);
+
+ if (vma->gpuva.flags & XE_VMA_DESTROYED)
+ return 0;
+
+ notifier_seq = mmu_interval_read_begin(&userptr->notifier);
+ if (notifier_seq == userptr->notifier_seq)
+ return 0;
+
+ if (userptr->sg)
+ xe_hmm_userptr_free_sg(uvma);
+
+ npages = xe_npages_in_range(userptr_start, userptr_end);
+ pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns))
+ return -ENOMEM;
+
+ if (write)
+ flags |= HMM_PFN_REQ_WRITE;
+
+ if (!mmget_not_zero(userptr->notifier.mm)) {
+ ret = -EFAULT;
+ goto free_pfns;
+ }
+
+ hmm_range.default_flags = flags;
+ hmm_range.hmm_pfns = pfns;
+ hmm_range.notifier = &userptr->notifier;
+ hmm_range.start = userptr_start;
+ hmm_range.end = userptr_end;
+ hmm_range.dev_private_owner = vm->xe;
+
+ while (true) {
+ hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);
+
+ if (!is_mm_mmap_locked)
+ mmap_read_lock(userptr->notifier.mm);
+
+ ret = hmm_range_fault(&hmm_range);
+
+ if (!is_mm_mmap_locked)
+ mmap_read_unlock(userptr->notifier.mm);
+
+ if (ret == -EBUSY) {
+ if (time_after(jiffies, timeout))
+ break;
+
+ continue;
+ }
+ break;
+ }
+
+ mmput(userptr->notifier.mm);
+
+ if (ret)
+ goto free_pfns;
+
+ ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write);
+ if (ret)
+ goto free_pfns;
+
+ xe_mark_range_accessed(&hmm_range, write);
+ userptr->sg = &userptr->sgt;
+ userptr->notifier_seq = hmm_range.notifier_seq;
+
+free_pfns:
+ kvfree(pfns);
+ return ret;
+}
+
diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h
new file mode 100644
index 000000000000..909dc2bdcd97
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hmm.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+struct xe_userptr_vma;
+
+int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked);
+void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma);
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index b545f850087c..39a484a57585 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -53,26 +53,19 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc)
struct xe_gt *gt = huc_to_gt(huc);
struct xe_device *xe = gt_to_xe(gt);
struct xe_bo *bo;
- int err;
/* we use a single object for both input and output */
bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
PXP43_HUC_AUTH_INOUT_SIZE * 2,
ttm_bo_type_kernel,
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT);
if (IS_ERR(bo))
return PTR_ERR(bo);
huc->gsc_pkt = bo;
- err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
- if (err) {
- free_gsc_pkt(&xe->drm, huc);
- return err;
- }
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
}
int xe_huc_init(struct xe_huc *huc)
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
index 18585a7eeb9d..3a888a40188b 100644
--- a/drivers/gpu/drm/xe/xe_huc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -12,6 +12,7 @@
#include "xe_gt.h"
#include "xe_huc.h"
#include "xe_macros.h"
+#include "xe_pm.h"
static struct xe_gt *
huc_to_gt(struct xe_huc *huc)
@@ -36,9 +37,9 @@ static int huc_info(struct seq_file *m, void *data)
struct xe_device *xe = huc_to_xe(huc);
struct drm_printer p = drm_seq_file_printer(m);
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get(xe);
xe_huc_print_info(huc, &p);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b5e83ea172f3..455f375c1cbd 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -14,8 +14,10 @@
#include "xe_device.h"
#include "xe_execlist.h"
#include "xe_force_wake.h"
+#include "xe_gsc.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
+#include "xe_gt_printk.h"
#include "xe_gt_topology.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
@@ -463,6 +465,32 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+
+ /*
+ * The GSC engine can accept submissions while the GSC shim is
+ * being reset, during which time the submission is stalled. In
+ * the worst case, the shim reset can take up to the maximum GSC
+ * command execution time (250ms), so the request start can be
+ * delayed by that much; the request itself can take that long
+ * without being preemptible, which means worst case it can
+ * theoretically take up to 500ms for a preemption to go through
+ * on the GSC engine. Adding to that an extra 100ms as a safety
+ * margin, we get a minimum recommended timeout of 600ms.
+ * The preempt_timeout value can't be tuned for OTHER_CLASS
+ * because the class is reserved for kernel usage, so we just
+ * need to make sure that the starting value is above that
+ * threshold; since our default value (640ms) is greater than
+ * 600ms, the only way we can go below is via a kconfig setting.
+ * If that happens, log it in dmesg and update the value.
+ */
+ if (hwe->class == XE_ENGINE_CLASS_OTHER) {
+ const u32 min_preempt_timeout = 600 * 1000;
+ if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
+ hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
+ xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
+ }
+ }
+
/* Record default props */
hwe->eclass->defaults = hwe->eclass->sched_props;
}
@@ -490,8 +518,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
xe_reg_sr_apply_whitelist(hwe);
hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(hwe->hwsp)) {
err = PTR_ERR(hwe->hwsp);
goto err_name;
@@ -509,18 +538,19 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
}
}
- if (xe_device_uc_enabled(xe))
+ if (xe_device_uc_enabled(xe)) {
+ /* GSCCS has a special interrupt for reset */
+ if (hwe->class == XE_ENGINE_CLASS_OTHER)
+ hwe->irq_handler = xe_gsc_hwe_irq_handler;
+
xe_hw_engine_enable_ring(hwe);
+ }
/* We reserve the highest BCS instance for USM */
if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
gt->usm.reserved_bcs_instance = hwe->instance;
- err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
- if (err)
- return err;
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
err_kernel_lrc:
xe_lrc_finish(&hwe->kernel_lrc);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 2345fb42fa39..844ec68cbbb8 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -7,8 +7,10 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include "xe_device.h"
#include "xe_gt.h"
#include "xe_hw_engine_class_sysfs.h"
+#include "xe_pm.h"
#define MAX_ENGINE_CLASS_NAME_LEN 16
static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
@@ -70,7 +72,7 @@ static ssize_t job_timeout_max_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_max);
}
static const struct kobj_attribute job_timeout_max_attr =
@@ -106,7 +108,7 @@ static ssize_t job_timeout_min_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_min);
}
static const struct kobj_attribute job_timeout_min_attr =
@@ -139,7 +141,7 @@ static ssize_t job_timeout_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_ms);
}
static const struct kobj_attribute job_timeout_attr =
@@ -150,7 +152,7 @@ static ssize_t job_timeout_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_ms);
}
static const struct kobj_attribute job_timeout_def =
@@ -161,7 +163,7 @@ static ssize_t job_timeout_min_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_min);
}
static const struct kobj_attribute job_timeout_min_def =
@@ -172,7 +174,7 @@ static ssize_t job_timeout_max_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_max);
}
static const struct kobj_attribute job_timeout_max_def =
@@ -231,7 +233,7 @@ static ssize_t timeslice_duration_max_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_max);
}
static const struct kobj_attribute timeslice_duration_max_attr =
@@ -269,7 +271,7 @@ static ssize_t timeslice_duration_min_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_min);
}
static const struct kobj_attribute timeslice_duration_min_attr =
@@ -281,7 +283,7 @@ static ssize_t timeslice_duration_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_us);
}
static const struct kobj_attribute timeslice_duration_attr =
@@ -293,7 +295,7 @@ static ssize_t timeslice_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.timeslice_us);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_us);
}
static const struct kobj_attribute timeslice_duration_def =
@@ -304,7 +306,7 @@ static ssize_t timeslice_min_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.timeslice_min);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_min);
}
static const struct kobj_attribute timeslice_duration_min_def =
@@ -315,7 +317,7 @@ static ssize_t timeslice_max_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.timeslice_max);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_max);
}
static const struct kobj_attribute timeslice_duration_max_def =
@@ -348,7 +350,7 @@ static ssize_t preempt_timeout_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
}
static const struct kobj_attribute preempt_timeout_attr =
@@ -360,7 +362,7 @@ static ssize_t preempt_timeout_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_us);
}
static const struct kobj_attribute preempt_timeout_def =
@@ -372,7 +374,7 @@ static ssize_t preempt_timeout_min_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_min);
}
static const struct kobj_attribute preempt_timeout_min_def =
@@ -384,7 +386,7 @@ static ssize_t preempt_timeout_max_default(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
- return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max);
+ return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_max);
}
static const struct kobj_attribute preempt_timeout_max_def =
@@ -420,7 +422,7 @@ static ssize_t preempt_timeout_max_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
}
static const struct kobj_attribute preempt_timeout_max_attr =
@@ -457,7 +459,7 @@ static ssize_t preempt_timeout_min_show(struct kobject *kobj,
{
struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
- return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
+ return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
}
static const struct kobj_attribute preempt_timeout_min_attr =
@@ -498,8 +500,8 @@ static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
kobject_put(kobj);
}
- static struct kobj_eclass *
-kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name)
+static struct kobj_eclass *
+kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char *name)
{
struct kobj_eclass *keclass;
int err = 0;
@@ -513,13 +515,13 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name
kobject_put(&keclass->base);
return NULL;
}
+ keclass->xe = xe;
err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
&keclass->base);
if (err)
- drm_warn(&xe->drm,
- "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
+ return NULL;
+
return keclass;
}
@@ -550,13 +552,8 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
if (err)
goto err_object;
- err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini,
- kobj);
- if (err)
- drm_warn(&xe->drm,
- "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
- return err;
+ return drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, kobj);
+
err_object:
kobject_put(kobj);
return err;
@@ -567,9 +564,51 @@ static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
kfree(kobj);
}
+static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xe_device *xe = kobj_to_xe(kobj);
+ struct kobj_attribute *kattr;
+ ssize_t ret = -EIO;
+
+ kattr = container_of(attr, struct kobj_attribute, attr);
+ if (kattr->show) {
+ xe_pm_runtime_get(xe);
+ ret = kattr->show(kobj, kattr, buf);
+ xe_pm_runtime_put(xe);
+ }
+
+ return ret;
+}
+
+static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xe_device *xe = kobj_to_xe(kobj);
+ struct kobj_attribute *kattr;
+ ssize_t ret = -EIO;
+
+ kattr = container_of(attr, struct kobj_attribute, attr);
+ if (kattr->store) {
+ xe_pm_runtime_get(xe);
+ ret = kattr->store(kobj, kattr, buf, count);
+ xe_pm_runtime_put(xe);
+ }
+
+ return ret;
+}
+
+static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = {
+ .show = xe_hw_engine_class_sysfs_attr_show,
+ .store = xe_hw_engine_class_sysfs_attr_store,
+};
+
static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
.release = xe_hw_engine_sysfs_kobj_release,
- .sysfs_ops = &kobj_sysfs_ops,
+ .sysfs_ops = &xe_hw_engine_class_sysfs_ops,
};
static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
@@ -579,6 +618,24 @@ static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
kobject_put(kobj);
}
+static const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
+{
+ switch (class) {
+ case XE_ENGINE_CLASS_RENDER:
+ return "rcs";
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ return "vcs";
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return "vecs";
+ case XE_ENGINE_CLASS_COPY:
+ return "bcs";
+ case XE_ENGINE_CLASS_COMPUTE:
+ return "ccs";
+ default:
+ return NULL;
+ }
+}
+
/**
* xe_hw_engine_class_sysfs_init - Init HW engine classes on GT.
* @gt: Xe GT.
@@ -608,7 +665,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
goto err_object;
for_each_hw_engine(hwe, gt, id) {
- char name[MAX_ENGINE_CLASS_NAME_LEN];
+ const char *name;
struct kobj_eclass *keclass;
if (hwe->class == XE_ENGINE_CLASS_OTHER ||
@@ -619,24 +676,8 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
continue;
class_mask |= 1 << hwe->class;
-
- switch (hwe->class) {
- case XE_ENGINE_CLASS_RENDER:
- strcpy(name, "rcs");
- break;
- case XE_ENGINE_CLASS_VIDEO_DECODE:
- strcpy(name, "vcs");
- break;
- case XE_ENGINE_CLASS_VIDEO_ENHANCE:
- strcpy(name, "vecs");
- break;
- case XE_ENGINE_CLASS_COPY:
- strcpy(name, "bcs");
- break;
- case XE_ENGINE_CLASS_COMPUTE:
- strcpy(name, "ccs");
- break;
- default:
+ name = xe_hw_engine_class_to_str(hwe->class);
+ if (!name) {
err = -EINVAL;
goto err_object;
}
@@ -649,26 +690,16 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
keclass->eclass = hwe->eclass;
err = xe_add_hw_engine_class_defaults(xe, &keclass->base);
- if (err) {
- drm_warn(&xe->drm,
- "Add .defaults to engines failed!, err: %d\n",
- err);
+ if (err)
goto err_object;
- }
err = sysfs_create_files(&keclass->base, files);
if (err)
goto err_object;
}
- err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini,
- kobj);
- if (err)
- drm_warn(&xe->drm,
- "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
+ return drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, kobj);
- return err;
err_object:
kobject_put(kobj);
return err;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
index ec5ba673b314..28a0d7c909c0 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -26,6 +26,8 @@ struct kobj_eclass {
struct kobject base;
/** @eclass: A pointer to the hw engine class interface */
struct xe_hw_engine_class_intf *eclass;
+ /** @xe: A pointer to the xe device */
+ struct xe_device *xe;
};
static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj)
@@ -33,4 +35,9 @@ static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kob
return container_of(kobj, struct kobj_eclass, base)->eclass;
}
+static inline struct xe_device *kobj_to_xe(struct kobject *kobj)
+{
+ return container_of(kobj, struct kobj_eclass, base)->xe;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index a5de3e7b0bd6..f872ef103127 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -130,7 +130,7 @@ void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
ctx->irq = irq;
ctx->dma_fence_ctx = dma_fence_context_alloc(1);
ctx->next_seqno = XE_FENCE_INITIAL_SEQNO;
- sprintf(ctx->name, "%s", name);
+ snprintf(ctx->name, sizeof(ctx->name), "%s", name);
}
void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 9ac7fbe201b3..453e601ddd5e 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -18,6 +18,7 @@
#include "xe_pcode.h"
#include "xe_pcode_api.h"
#include "xe_sriov.h"
+#include "xe_pm.h"
enum xe_hwmon_reg {
REG_PKG_RAPL_LIMIT,
@@ -33,6 +34,12 @@ enum xe_hwmon_reg_operation {
REG_READ64,
};
+enum xe_hwmon_channel {
+ CHANNEL_CARD,
+ CHANNEL_PKG,
+ CHANNEL_MAX,
+};
+
/*
* SF_* - scale factors for particular quantities according to hwmon spec.
*/
@@ -68,61 +75,61 @@ struct xe_hwmon {
int scl_shift_energy;
/** @scl_shift_time: pkg time unit */
int scl_shift_time;
- /** @ei: Energy info for energy1_input */
- struct xe_hwmon_energy_info ei;
+ /** @ei: Energy info for energyN_input */
+ struct xe_hwmon_energy_info ei[CHANNEL_MAX];
};
-static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
+static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
+ int channel)
{
struct xe_device *xe = gt_to_xe(hwmon->gt);
- struct xe_reg reg = XE_REG(0);
switch (hwmon_reg) {
case REG_PKG_RAPL_LIMIT:
- if (xe->info.platform == XE_PVC)
- reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
- else if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_RAPL_LIMIT;
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+ return PVC_GT0_PACKAGE_RAPL_LIMIT;
+ else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+ return PCU_CR_PACKAGE_RAPL_LIMIT;
break;
case REG_PKG_POWER_SKU:
- if (xe->info.platform == XE_PVC)
- reg = PVC_GT0_PACKAGE_POWER_SKU;
- else if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_POWER_SKU;
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+ return PVC_GT0_PACKAGE_POWER_SKU;
+ else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+ return PCU_CR_PACKAGE_POWER_SKU;
break;
case REG_PKG_POWER_SKU_UNIT:
if (xe->info.platform == XE_PVC)
- reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
+ return PVC_GT0_PACKAGE_POWER_SKU_UNIT;
else if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
+ return PCU_CR_PACKAGE_POWER_SKU_UNIT;
break;
case REG_GT_PERF_STATUS:
- if (xe->info.platform == XE_DG2)
- reg = GT_PERF_STATUS;
+ if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG)
+ return GT_PERF_STATUS;
break;
case REG_PKG_ENERGY_STATUS:
- if (xe->info.platform == XE_PVC)
- reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
- else if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_ENERGY_STATUS;
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+ return PVC_GT0_PLATFORM_ENERGY_STATUS;
+ else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+ return PCU_CR_PACKAGE_ENERGY_STATUS;
break;
default:
drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
break;
}
- return reg.raw;
+ return XE_REG(0);
}
static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
enum xe_hwmon_reg_operation operation, u64 *value,
- u32 clr, u32 set)
+ u32 clr, u32 set, int channel)
{
struct xe_reg reg;
- reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
+ reg = xe_hwmon_get_reg(hwmon, hwmon_reg, channel);
- if (!reg.raw)
+ if (!xe_reg_is_valid(reg))
return;
switch (operation) {
@@ -150,13 +157,13 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon
* same pattern for sysfs, allow arbitrary PL1 limits to be set but display
* clamped values when read.
*/
-static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value)
{
u64 reg_val, min, max;
mutex_lock(&hwmon->hwmon_lock);
- xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0);
+ xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0, channel);
/* Check if PL1 limit is disabled */
if (!(reg_val & PKG_PWR_LIM_1_EN)) {
*value = PL1_DISABLE;
@@ -166,7 +173,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
- xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0);
+ xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0, channel);
min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
@@ -178,7 +185,7 @@ unlock:
mutex_unlock(&hwmon->hwmon_lock);
}
-static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
+static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value)
{
int ret = 0;
u64 reg_val;
@@ -188,9 +195,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
if (value == PL1_DISABLE) {
xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
- PKG_PWR_LIM_1_EN, 0);
+ PKG_PWR_LIM_1_EN, 0, channel);
xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val,
- PKG_PWR_LIM_1_EN, 0);
+ PKG_PWR_LIM_1_EN, 0, channel);
if (reg_val & PKG_PWR_LIM_1_EN) {
ret = -EOPNOTSUPP;
@@ -203,17 +210,17 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
- PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
+ PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val, channel);
unlock:
mutex_unlock(&hwmon->hwmon_lock);
return ret;
}
-static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
{
u64 reg_val;
- xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0);
+ xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0, channel);
reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
}
@@ -236,16 +243,16 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
* the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
* a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
* hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
- * energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
+ * energyN_input overflows. This at 1000 W is an overflow duration of 278 years.
*/
static void
-xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
+xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
{
- struct xe_hwmon_energy_info *ei = &hwmon->ei;
+ struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
u64 reg_val;
xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32,
- &reg_val, 0, 0);
+ &reg_val, 0, 0, channel);
if (reg_val >= ei->reg_val_prev)
ei->accum_energy += reg_val - ei->reg_val_prev;
@@ -259,23 +266,24 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
}
static ssize_t
-xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
- char *buf)
+xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
u32 x, y, x_w = 2; /* 2 bits */
u64 r, tau4, out;
+ int sensor_index = to_sensor_dev_attr(attr)->index;
- xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_get(gt_to_xe(hwmon->gt));
mutex_lock(&hwmon->hwmon_lock);
xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT,
- REG_READ32, &r, 0, 0);
+ REG_READ32, &r, 0, 0, sensor_index);
mutex_unlock(&hwmon->hwmon_lock);
- xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_put(gt_to_xe(hwmon->gt));
x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
@@ -299,14 +307,15 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a
}
static ssize_t
-xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
u32 x, y, rxy, x_w = 2; /* 2 bits */
u64 tau4, r, max_win;
unsigned long val;
int ret;
+ int sensor_index = to_sensor_dev_attr(attr)->index;
ret = kstrtoul(buf, 0, &val);
if (ret)
@@ -325,7 +334,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
/*
* val must be < max in hwmon interface units. The steps below are
- * explained in xe_hwmon_power1_max_interval_show()
+ * explained in xe_hwmon_power_max_interval_show()
*/
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
@@ -354,26 +363,31 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
- xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_get(gt_to_xe(hwmon->gt));
mutex_lock(&hwmon->hwmon_lock);
xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r,
- PKG_PWR_LIM_1_TIME, rxy);
+ PKG_PWR_LIM_1_TIME, rxy, sensor_index);
mutex_unlock(&hwmon->hwmon_lock);
- xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_put(gt_to_xe(hwmon->gt));
return count;
}
static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
- xe_hwmon_power1_max_interval_show,
- xe_hwmon_power1_max_interval_store, 0);
+ xe_hwmon_power_max_interval_show,
+ xe_hwmon_power_max_interval_store, CHANNEL_CARD);
+
+static SENSOR_DEVICE_ATTR(power2_max_interval, 0664,
+ xe_hwmon_power_max_interval_show,
+ xe_hwmon_power_max_interval_store, CHANNEL_PKG);
static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_power1_max_interval.dev_attr.attr,
+ &sensor_dev_attr_power2_max_interval.dev_attr.attr,
NULL
};
@@ -384,12 +398,11 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
int ret = 0;
- xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_get(gt_to_xe(hwmon->gt));
- if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
- ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0;
+ ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0;
- xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_put(gt_to_xe(hwmon->gt));
return ret;
}
@@ -405,10 +418,11 @@ static const struct attribute_group *hwmon_groups[] = {
};
static const struct hwmon_channel_info * const hwmon_info[] = {
- HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
- HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
- HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
- HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+ HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL,
+ HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL),
+ HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL),
+ HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL),
+ HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL),
NULL
};
@@ -431,7 +445,8 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
uval);
}
-static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor)
+static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel,
+ long *value, u32 scale_factor)
{
int ret;
u32 uval;
@@ -449,7 +464,8 @@ unlock:
return ret;
}
-static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor)
+static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
+ long value, u32 scale_factor)
{
int ret;
u32 uval;
@@ -463,117 +479,131 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u3
return ret;
}
-static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value)
{
u64 reg_val;
xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS,
- REG_READ32, &reg_val, 0, 0);
+ REG_READ32, &reg_val, 0, 0, channel);
/* HW register value in units of 2.5 millivolt */
*value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE);
}
static umode_t
-xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
+xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
u32 uval;
switch (attr) {
case hwmon_power_max:
- return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0;
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
+ channel)) ? 0664 : 0;
case hwmon_power_rated_max:
- return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0;
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU,
+ channel)) ? 0444 : 0;
case hwmon_power_crit:
- return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
- !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ if (channel == CHANNEL_PKG)
+ return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+ !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ break;
+ case hwmon_power_label:
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
+ channel)) ? 0444 : 0;
default:
return 0;
}
+ return 0;
}
static int
-xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
+xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
{
switch (attr) {
case hwmon_power_max:
- xe_hwmon_power_max_read(hwmon, val);
+ xe_hwmon_power_max_read(hwmon, channel, val);
return 0;
case hwmon_power_rated_max:
- xe_hwmon_power_rated_max_read(hwmon, val);
+ xe_hwmon_power_rated_max_read(hwmon, channel, val);
return 0;
case hwmon_power_crit:
- return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER);
+ return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER);
default:
return -EOPNOTSUPP;
}
}
static int
-xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val)
+xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
{
switch (attr) {
case hwmon_power_max:
- return xe_hwmon_power_max_write(hwmon, val);
+ return xe_hwmon_power_max_write(hwmon, channel, val);
case hwmon_power_crit:
- return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER);
+ return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER);
default:
return -EOPNOTSUPP;
}
}
static umode_t
-xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr)
+xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel)
{
u32 uval;
switch (attr) {
case hwmon_curr_crit:
- return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
- (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ case hwmon_curr_label:
+ if (channel == CHANNEL_PKG)
+ return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+ (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ break;
default:
return 0;
}
+ return 0;
}
static int
-xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
{
switch (attr) {
case hwmon_curr_crit:
- return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR);
+ return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_CURR);
default:
return -EOPNOTSUPP;
}
}
static int
-xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val)
+xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
{
switch (attr) {
case hwmon_curr_crit:
- return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR);
+ return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_CURR);
default:
return -EOPNOTSUPP;
}
}
static umode_t
-xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr)
+xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
switch (attr) {
case hwmon_in_input:
- return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0;
+ case hwmon_in_label:
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS,
+ channel)) ? 0444 : 0;
default:
return 0;
}
}
static int
-xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
{
switch (attr) {
case hwmon_in_input:
- xe_hwmon_get_voltage(hwmon, val);
+ xe_hwmon_get_voltage(hwmon, channel, val);
return 0;
default:
return -EOPNOTSUPP;
@@ -581,22 +611,24 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
}
static umode_t
-xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr)
+xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
switch (attr) {
case hwmon_energy_input:
- return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0;
+ case hwmon_energy_label:
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
+ channel)) ? 0444 : 0;
default:
return 0;
}
}
static int
-xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
{
switch (attr) {
case hwmon_energy_input:
- xe_hwmon_energy_get(hwmon, val);
+ xe_hwmon_energy_get(hwmon, channel, val);
return 0;
default:
return -EOPNOTSUPP;
@@ -610,27 +642,27 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata;
int ret;
- xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_get(gt_to_xe(hwmon->gt));
switch (type) {
case hwmon_power:
ret = xe_hwmon_power_is_visible(hwmon, attr, channel);
break;
case hwmon_curr:
- ret = xe_hwmon_curr_is_visible(hwmon, attr);
+ ret = xe_hwmon_curr_is_visible(hwmon, attr, channel);
break;
case hwmon_in:
- ret = xe_hwmon_in_is_visible(hwmon, attr);
+ ret = xe_hwmon_in_is_visible(hwmon, attr, channel);
break;
case hwmon_energy:
- ret = xe_hwmon_energy_is_visible(hwmon, attr);
+ ret = xe_hwmon_energy_is_visible(hwmon, attr, channel);
break;
default:
ret = 0;
break;
}
- xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_put(gt_to_xe(hwmon->gt));
return ret;
}
@@ -642,27 +674,27 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
int ret;
- xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_get(gt_to_xe(hwmon->gt));
switch (type) {
case hwmon_power:
ret = xe_hwmon_power_read(hwmon, attr, channel, val);
break;
case hwmon_curr:
- ret = xe_hwmon_curr_read(hwmon, attr, val);
+ ret = xe_hwmon_curr_read(hwmon, attr, channel, val);
break;
case hwmon_in:
- ret = xe_hwmon_in_read(hwmon, attr, val);
+ ret = xe_hwmon_in_read(hwmon, attr, channel, val);
break;
case hwmon_energy:
- ret = xe_hwmon_energy_read(hwmon, attr, val);
+ ret = xe_hwmon_energy_read(hwmon, attr, channel, val);
break;
default:
ret = -EOPNOTSUPP;
break;
}
- xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_put(gt_to_xe(hwmon->gt));
return ret;
}
@@ -674,29 +706,49 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
int ret;
- xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_get(gt_to_xe(hwmon->gt));
switch (type) {
case hwmon_power:
ret = xe_hwmon_power_write(hwmon, attr, channel, val);
break;
case hwmon_curr:
- ret = xe_hwmon_curr_write(hwmon, attr, val);
+ ret = xe_hwmon_curr_write(hwmon, attr, channel, val);
break;
default:
ret = -EOPNOTSUPP;
break;
}
- xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+ xe_pm_runtime_put(gt_to_xe(hwmon->gt));
return ret;
}
+static int xe_hwmon_read_label(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
+{
+ switch (type) {
+ case hwmon_power:
+ case hwmon_energy:
+ case hwmon_curr:
+ case hwmon_in:
+ if (channel == CHANNEL_CARD)
+ *str = "card";
+ else if (channel == CHANNEL_PKG)
+ *str = "pkg";
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct hwmon_ops hwmon_ops = {
.is_visible = xe_hwmon_is_visible,
.read = xe_hwmon_read,
.write = xe_hwmon_write,
+ .read_string = xe_hwmon_read_label,
};
static const struct hwmon_chip_info hwmon_chip_info = {
@@ -710,14 +762,15 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
struct xe_hwmon *hwmon = xe->hwmon;
long energy;
u64 val_sku_unit = 0;
+ int channel;
/*
* The contents of register PKG_POWER_SKU_UNIT do not change,
* so read it once and store the shift values.
*/
- if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) {
+ if (xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0))) {
xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
- REG_READ32, &val_sku_unit, 0, 0);
+ REG_READ32, &val_sku_unit, 0, 0, 0);
hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
@@ -727,8 +780,9 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
* Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the
* first value of the energy register read
*/
- if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0))
- xe_hwmon_energy_get(hwmon, &energy);
+ for (channel = 0; channel < CHANNEL_MAX; channel++)
+ if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel))
+ xe_hwmon_energy_get(hwmon, channel, &energy);
}
static void xe_hwmon_mutex_destroy(void *arg)
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 2f5d179e0d00..996806353171 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -187,7 +187,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
* GSCCS interrupts, but it has its own mask register.
*/
if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
- gsc_mask = irqs;
+ gsc_mask = irqs | GSC_ER_COMPLETE;
heci_mask = GSC_IRQ_INTF(1);
} else if (HAS_HECI_GSCFI(xe)) {
gsc_mask = GSC_IRQ_INTF(1);
@@ -326,7 +326,6 @@ static void gt_irq_handler(struct xe_tile *tile,
xe_heci_gsc_irq_handler(xe, intr_vec);
else
gt_other_irq_handler(engine_gt, instance, intr_vec);
- continue;
}
}
}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 0d7c5514e092..418661a88918 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -35,7 +35,7 @@
static bool xe_has_multi_level_lmtt(struct xe_device *xe)
{
- return xe->info.platform == XE_PVC;
+ return GRAPHICS_VERx100(xe) >= 1260;
}
static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt)
@@ -70,8 +70,8 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
lmtt->ops->lmtt_pte_num(level)),
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
- XE_BO_CREATE_PINNED_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+ XE_BO_NEEDS_64K | XE_BO_FLAG_PINNED);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 57066faf575e..615bbc372ac6 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -5,8 +5,11 @@
#include "xe_lrc.h"
+#include <linux/ascii85.h>
+
#include "instructions/xe_mi_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
+#include "instructions/xe_gfx_state_commands.h"
#include "regs/xe_engine_regs.h"
#include "regs/xe_gpu_commands.h"
#include "regs/xe_lrc_layout.h"
@@ -23,13 +26,28 @@
#include "xe_sriov.h"
#include "xe_vm.h"
-#define LRC_VALID (1 << 0)
-#define LRC_PRIVILEGE (1 << 8)
-#define LRC_ADDRESSING_MODE_SHIFT 3
+#define LRC_VALID BIT_ULL(0)
+#define LRC_PRIVILEGE BIT_ULL(8)
+#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
#define LRC_LEGACY_64B_CONTEXT 3
-#define ENGINE_CLASS_SHIFT 61
-#define ENGINE_INSTANCE_SHIFT 48
+#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
+#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
+
+struct xe_lrc_snapshot {
+ struct xe_bo *lrc_bo;
+ void *lrc_snapshot;
+ unsigned long lrc_size, lrc_offset;
+
+ u32 context_desc;
+ u32 head;
+ struct {
+ u32 internal;
+ u32 memory;
+ } tail;
+ u32 start_seqno;
+ u32 seqno;
+};
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
@@ -634,7 +652,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
return map; \
} \
-static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
+static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
{ \
return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
} \
@@ -724,8 +742,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
ring_size + xe_lrc_size(xe, hwe->class),
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
@@ -776,7 +795,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
lrc->desc = LRC_VALID;
- lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
+ lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
/* TODO: Priority */
/* While this appears to have something about privileged batches or
@@ -786,8 +805,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
lrc->desc |= LRC_PRIVILEGE;
if (GRAPHICS_VERx100(xe) < 1250) {
- lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
- lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
+ lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
+ lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
}
arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1034,6 +1053,8 @@ static int dump_gfxpipe_command(struct drm_printer *p,
MATCH(GPGPU_CSR_BASE_ADDRESS);
MATCH(STATE_COMPUTE_MODE);
MATCH3D(3DSTATE_BTD);
+ MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
+ MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
MATCH3D(3DSTATE_VF_STATISTICS);
@@ -1058,6 +1079,7 @@ static int dump_gfxpipe_command(struct drm_printer *p,
MATCH3D(3DSTATE_WM);
MATCH3D(3DSTATE_CONSTANT_VS);
MATCH3D(3DSTATE_CONSTANT_GS);
+ MATCH3D(3DSTATE_CONSTANT_PS);
MATCH3D(3DSTATE_SAMPLE_MASK);
MATCH3D(3DSTATE_CONSTANT_HS);
MATCH3D(3DSTATE_CONSTANT_DS);
@@ -1150,6 +1172,31 @@ static int dump_gfxpipe_command(struct drm_printer *p,
}
}
+static int dump_gfx_state_command(struct drm_printer *p,
+ struct xe_gt *gt,
+ u32 *dw,
+ int remaining_dw)
+{
+ u32 numdw = instr_dw(*dw);
+ u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
+
+ /*
+ * Make sure we haven't mis-parsed a number of dwords that exceeds the
+ * remaining size of the LRC.
+ */
+ if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+ numdw = remaining_dw;
+
+ switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
+ MATCH(STATE_WRITE_INLINE);
+
+ default:
+ drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
+ *dw, opcode, numdw);
+ return numdw;
+ }
+}
+
void xe_lrc_dump_default(struct drm_printer *p,
struct xe_gt *gt,
enum xe_engine_class hwe_class)
@@ -1174,6 +1221,8 @@ void xe_lrc_dump_default(struct drm_printer *p,
num_dw = dump_mi_command(p, gt, dw, remaining_dw);
} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
+ } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
+ num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
} else {
num_dw = min(instr_dw(*dw), remaining_dw);
drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
@@ -1297,3 +1346,101 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
bb->len += num_dw;
}
}
+
+struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
+{
+ struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
+
+ if (!snapshot)
+ return NULL;
+
+ snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc));
+ snapshot->head = xe_lrc_ring_head(lrc);
+ snapshot->tail.internal = lrc->ring.tail;
+ snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
+ snapshot->start_seqno = xe_lrc_start_seqno(lrc);
+ snapshot->seqno = xe_lrc_seqno(lrc);
+ snapshot->lrc_bo = xe_bo_get(lrc->bo);
+ snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
+ snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
+ snapshot->lrc_snapshot = NULL;
+ return snapshot;
+}
+
+void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
+{
+ struct xe_bo *bo;
+ struct iosys_map src;
+
+ if (!snapshot)
+ return;
+
+ bo = snapshot->lrc_bo;
+ snapshot->lrc_bo = NULL;
+
+ snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
+ if (!snapshot->lrc_snapshot)
+ goto put_bo;
+
+ dma_resv_lock(bo->ttm.base.resv, NULL);
+ if (!ttm_bo_vmap(&bo->ttm, &src)) {
+ xe_map_memcpy_from(xe_bo_device(bo),
+ snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
+ snapshot->lrc_size);
+ ttm_bo_vunmap(&bo->ttm, &src);
+ } else {
+ kvfree(snapshot->lrc_snapshot);
+ snapshot->lrc_snapshot = NULL;
+ }
+ dma_resv_unlock(bo->ttm.base.resv);
+put_bo:
+ xe_bo_put(bo);
+}
+
+void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
+{
+ unsigned long i;
+
+ if (!snapshot)
+ return;
+
+ drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
+ drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
+ drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
+ snapshot->tail.internal, snapshot->tail.memory);
+ drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
+ drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
+
+ if (!snapshot->lrc_snapshot)
+ return;
+
+ drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
+ drm_puts(p, "\t[HWSP].data: ");
+ for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
+ u32 *val = snapshot->lrc_snapshot + i;
+ char dumped[ASCII85_BUFSZ];
+
+ drm_puts(p, ascii85_encode(*val, dumped));
+ }
+
+ drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
+ drm_puts(p, "\t[HWCTX].data: ");
+ for (; i < snapshot->lrc_size; i += sizeof(u32)) {
+ u32 *val = snapshot->lrc_snapshot + i;
+ char dumped[ASCII85_BUFSZ];
+
+ drm_puts(p, ascii85_encode(*val, dumped));
+ }
+ drm_puts(p, "\n");
+}
+
+void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
+{
+ if (!snapshot)
+ return;
+
+ kvfree(snapshot->lrc_snapshot);
+ if (snapshot->lrc_bo)
+ xe_bo_put(snapshot->lrc_bo);
+ kfree(snapshot);
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 28b1d3f404d4..d32fa31faa2c 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -55,4 +55,9 @@ void xe_lrc_dump_default(struct drm_printer *p,
void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb);
+struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc);
+void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot);
+void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p);
+void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 24f20ed66fd1..b716df0dfb4e 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -43,4 +43,6 @@ struct xe_lrc {
struct xe_hw_fence_ctx fence_ctx;
};
+struct xe_lrc_snapshot;
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 76e95535d7f6..95b6e9d7b7db 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -127,10 +127,11 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
/* XXX: convert to managed bo */
bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
ttm_bo_type_kernel,
- XE_BO_CREATE_SYSTEM_BIT |
- XE_BO_CREATE_GGTT_BIT |
- XE_BO_NEEDS_UC |
- XE_BO_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_NEEDS_UC |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 2ba4fb9511f6..9f6e9b7f11c8 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -16,6 +16,7 @@
#include "instructions/xe_mi_commands.h"
#include "regs/xe_gpu_commands.h"
+#include "regs/xe_gtt_defs.h"
#include "tests/xe_test.h"
#include "xe_assert.h"
#include "xe_bb.h"
@@ -155,8 +156,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
bo = xe_bo_create_pin_map(vm->xe, tile, vm,
num_entries * XE_PAGE_SIZE,
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_PINNED_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -984,7 +985,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_res_cursor src_it;
struct ttm_resource *src = dst;
int err;
- int pass = 0;
if (!clear_vram)
xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
@@ -1005,8 +1005,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
clear_L0 = xe_migrate_res_sizes(m, &src_it);
- drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
-
/* Calculate final sizes and batch size.. */
batch_size = 2 +
pte_update_size(m, clear_vram, src, &src_it,
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 7ba2477452d7..334637511e75 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -163,6 +163,42 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
return 0;
}
+static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u64 offset;
+ u32 reg;
+
+ if (GRAPHICS_VER(xe) >= 20) {
+ u64 ccs_size = tile_size / 512;
+ u64 offset_hi, offset_lo;
+ u32 nodes, num_enabled;
+
+ reg = xe_mmio_read32(gt, MIRROR_FUSE3);
+ nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg);
+ num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+ offset_lo = REG_FIELD_GET(XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK, reg);
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_UPPER);
+ offset_hi = REG_FIELD_GET(XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK, reg);
+
+ offset = offset_hi << 32; /* HW view bits 39:32 */
+ offset |= offset_lo << 6; /* HW view bits 31:6 */
+ offset *= num_enabled; /* convert to SW view */
+
+ /* We don't expect any holes */
+ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
+ "Hole between CCS and GSM.\n");
+ } else {
+ reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+ offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K;
+ }
+
+ return offset;
+}
+
/**
* xe_mmio_tile_vram_size() - Collect vram size and offset information
* @tile: tile to get info for
@@ -207,8 +243,7 @@ static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
/* minus device usage */
if (xe->info.has_flat_ccs) {
- reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
- offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+ offset = get_flat_ccs_offset(gt, *tile_size);
} else {
offset = xe_mmio_read64_2x32(gt, GSMBASE);
}
@@ -360,32 +395,9 @@ static void mmio_fini(struct drm_device *drm, void *arg)
iounmap(xe->mem.vram.mapping);
}
-static int xe_verify_lmem_ready(struct xe_device *xe)
-{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
-
- if (!IS_DGFX(xe))
- return 0;
-
- if (IS_SRIOV_VF(xe))
- return 0;
-
- /*
- * The boot firmware initializes local memory and assesses its health.
- * If memory training fails, the punit will have been instructed to
- * keep the GT powered down; we won't be able to communicate with it
- * and we should not continue with driver initialization.
- */
- if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
- drm_err(&xe->drm, "VRAM not initialized by firmware\n");
- return -ENODEV;
- }
-
- return 0;
-}
-
int xe_mmio_init(struct xe_device *xe)
{
+ struct xe_tile *root_tile = xe_device_get_root_tile(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
const int mmio_bar = 0;
@@ -401,23 +413,83 @@ int xe_mmio_init(struct xe_device *xe)
return -EIO;
}
+ /* Setup first tile; other tiles (if present) will be setup later. */
+ root_tile->mmio.size = SZ_16M;
+ root_tile->mmio.regs = xe->mmio.regs;
+
return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
}
-int xe_mmio_root_tile_init(struct xe_device *xe)
+u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
{
- struct xe_tile *root_tile = xe_device_get_root_tile(xe);
- int err;
+ struct xe_tile *tile = gt_to_tile(gt);
- /* Setup first tile; other tiles (if present) will be setup later. */
- root_tile->mmio.size = SZ_16M;
- root_tile->mmio.regs = xe->mmio.regs;
+ if (reg.addr < gt->mmio.adj_limit)
+ reg.addr += gt->mmio.adj_offset;
- err = xe_verify_lmem_ready(xe);
- if (err)
- return err;
+ return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
- return 0;
+u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+{
+ struct xe_tile *tile = gt_to_tile(gt);
+
+ if (reg.addr < gt->mmio.adj_limit)
+ reg.addr += gt->mmio.adj_offset;
+
+ return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
+{
+ struct xe_tile *tile = gt_to_tile(gt);
+
+ if (reg.addr < gt->mmio.adj_limit)
+ reg.addr += gt->mmio.adj_offset;
+
+ writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
+{
+ struct xe_tile *tile = gt_to_tile(gt);
+
+ if (reg.addr < gt->mmio.adj_limit)
+ reg.addr += gt->mmio.adj_offset;
+
+ return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
+{
+ u32 old, reg_val;
+
+ old = xe_mmio_read32(gt, reg);
+ reg_val = (old & ~clr) | set;
+ xe_mmio_write32(gt, reg, reg_val);
+
+ return old;
+}
+
+int xe_mmio_write32_and_verify(struct xe_gt *gt,
+ struct xe_reg reg, u32 val, u32 mask, u32 eval)
+{
+ u32 reg_val;
+
+ xe_mmio_write32(gt, reg, val);
+ reg_val = xe_mmio_read32(gt, reg);
+
+ return (reg_val & mask) != eval ? -EINVAL : 0;
+}
+
+bool xe_mmio_in_range(const struct xe_gt *gt,
+ const struct xe_mmio_range *range,
+ struct xe_reg reg)
+{
+ if (reg.addr < gt->mmio.adj_limit)
+ reg.addr += gt->mmio.adj_offset;
+
+ return range && reg.addr >= range->start && reg.addr <= range->end;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 98de5c13c89b..a3cd7b3036c7 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -21,83 +21,15 @@ struct xe_device;
#define LMEM_BAR 2
int xe_mmio_init(struct xe_device *xe);
-int xe_mmio_root_tile_init(struct xe_device *xe);
void xe_mmio_probe_tiles(struct xe_device *xe);
-static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
-{
- struct xe_tile *tile = gt_to_tile(gt);
-
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
-{
- struct xe_tile *tile = gt_to_tile(gt);
-
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline void xe_mmio_write32(struct xe_gt *gt,
- struct xe_reg reg, u32 val)
-{
- struct xe_tile *tile = gt_to_tile(gt);
-
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
-{
- struct xe_tile *tile = gt_to_tile(gt);
-
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
- u32 set)
-{
- u32 old, reg_val;
-
- old = xe_mmio_read32(gt, reg);
- reg_val = (old & ~clr) | set;
- xe_mmio_write32(gt, reg, reg_val);
-
- return old;
-}
-
-static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
- struct xe_reg reg, u32 val,
- u32 mask, u32 eval)
-{
- u32 reg_val;
-
- xe_mmio_write32(gt, reg, val);
- reg_val = xe_mmio_read32(gt, reg);
-
- return (reg_val & mask) != eval ? -EINVAL : 0;
-}
-
-static inline bool xe_mmio_in_range(const struct xe_gt *gt,
- const struct xe_mmio_range *range,
- struct xe_reg reg)
-{
- if (reg.addr < gt->mmio.adj_limit)
- reg.addr += gt->mmio.adj_offset;
-
- return range && reg.addr >= range->start && reg.addr <= range->end;
-}
+u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);
+u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg);
+void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
+u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg);
+u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
+int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval);
+bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg);
int xe_mmio_probe_vram(struct xe_device *xe);
u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 609d997b3e9b..1e92f8ee07ba 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -17,10 +17,10 @@
#include "xe_step_types.h"
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
-#define mocs_dbg drm_dbg
+#define mocs_dbg xe_gt_dbg
#else
__printf(2, 3)
-static inline void mocs_dbg(const struct drm_device *dev,
+static inline void mocs_dbg(const struct xe_gt *gt,
const char *format, ...)
{ /* noop */ }
#endif
@@ -72,7 +72,7 @@ struct xe_mocs_info {
/* Helper defines */
#define XELP_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
#define PVC_NUM_MOCS_ENTRIES 3
-#define MTL_NUM_MOCS_ENTRIES 16
+#define MTL_NUM_MOCS_ENTRIES 16
#define XE2_NUM_MOCS_ENTRIES 16
/* (e)LLC caching options */
@@ -375,6 +375,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
switch (xe->info.platform) {
case XE_LUNARLAKE:
+ case XE_BATTLEMAGE:
info->size = ARRAY_SIZE(xe2_mocs_table);
info->table = xe2_mocs_table;
info->n_entries = XE2_NUM_MOCS_ENTRIES;
@@ -401,7 +402,11 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
info->size = ARRAY_SIZE(dg2_mocs_desc);
info->table = dg2_mocs_desc;
info->uc_index = 1;
- info->n_entries = XELP_NUM_MOCS_ENTRIES;
+ /*
+ * Last entry is RO on hardware, don't bother with what was
+ * written when checking later
+ */
+ info->n_entries = XELP_NUM_MOCS_ENTRIES - 1;
info->unused_entries_index = 3;
break;
case XE_DG1:
@@ -462,24 +467,34 @@ static u32 get_entry_control(const struct xe_mocs_info *info,
return info->table[info->unused_entries_index].control_value;
}
-static void __init_mocs_table(struct xe_gt *gt,
- const struct xe_mocs_info *info)
+static bool regs_are_mcr(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ if (xe_gt_is_media_type(gt))
+ return MEDIA_VER(xe) >= 20;
+ else
+ return GRAPHICS_VERx100(xe) >= 1250;
+}
+
+static void __init_mocs_table(struct xe_gt *gt,
+ const struct xe_mocs_info *info)
+{
unsigned int i;
u32 mocs;
- mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
- drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
- "Unused entries index should have been defined\n");
- for (i = 0;
- i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
- i++) {
- mocs_dbg(&gt_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
+ xe_gt_WARN_ONCE(gt, !info->unused_entries_index,
+ "Unused entries index should have been defined\n");
+
+ mocs_dbg(gt, "mocs entries: %d\n", info->n_entries);
+
+ for (i = 0; i < info->n_entries; i++) {
+ mocs = get_entry_control(info, i);
+
+ mocs_dbg(gt, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
XELP_GLOBAL_MOCS(i).addr, mocs);
- if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250)
+ if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
else
xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
@@ -510,16 +525,16 @@ static void init_l3cc_table(struct xe_gt *gt,
unsigned int i;
u32 l3cc;
- mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
- for (i = 0;
- i < (info->n_entries + 1) / 2 ?
- (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
- get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
- i++) {
- mocs_dbg(&gt_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr,
- l3cc);
+ mocs_dbg(gt, "l3cc entries: %d\n", info->n_entries);
+
+ for (i = 0; i < (info->n_entries + 1) / 2; i++) {
+ l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+ get_entry_l3cc(info, 2 * i + 1));
- if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+ mocs_dbg(gt, "LNCFCMOCS[%d] 0x%x 0x%x\n", i,
+ XELP_LNCFCMOCS(i).addr, l3cc);
+
+ if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
else
xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
@@ -552,7 +567,10 @@ void xe_mocs_init(struct xe_gt *gt)
* performed by the GuC.
*/
flags = get_mocs_settings(gt_to_xe(gt), &table);
- mocs_dbg(&gt_to_xe(gt)->drm, "flag:0x%x\n", flags);
+ mocs_dbg(gt, "flag:0x%x\n", flags);
+
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ return;
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt, &table);
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 110b69864656..ceb8345cbca6 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -48,6 +48,13 @@ module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
MODULE_PARM_DESC(force_probe,
"Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+#ifdef CONFIG_PCI_IOV
+module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400);
+MODULE_PARM_DESC(max_vfs,
+ "Limit number of Virtual Functions (VFs) that could be managed. "
+ "(0 = no VFs [default]; N = allow up to N VFs)");
+#endif
+
struct init_funcs {
int (*init)(void);
void (*exit)(void);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 88ef0e8b2bfd..b369984f08ec 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -18,6 +18,9 @@ struct xe_modparam {
char *huc_firmware_path;
char *gsc_firmware_path;
char *force_probe;
+#ifdef CONFIG_PCI_IOV
+ unsigned int max_vfs;
+#endif
};
extern struct xe_modparam xe_modparam;
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index e148934d554b..d5b516f115ad 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -142,6 +142,7 @@ static const struct xe_pat_table_entry xe2_pat_table[] = {
/* Special PAT values programmed outside the main table */
static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 );
+static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 );
u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
{
@@ -174,7 +175,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
int i, err;
- xe_device_mem_access_get(xe);
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
goto err_fw;
@@ -192,7 +192,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
err_fw:
xe_assert(xe, !err);
- xe_device_mem_access_put(xe);
}
static const struct xe_pat_ops xelp_pat_ops = {
@@ -205,7 +204,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
int i, err;
- xe_device_mem_access_get(xe);
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
goto err_fw;
@@ -225,7 +223,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
err_fw:
xe_assert(xe, !err);
- xe_device_mem_access_put(xe);
}
static const struct xe_pat_ops xehp_pat_ops = {
@@ -238,7 +235,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
int i, err;
- xe_device_mem_access_get(xe);
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
goto err_fw;
@@ -256,7 +252,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
err_fw:
xe_assert(xe, !err);
- xe_device_mem_access_put(xe);
}
static const struct xe_pat_ops xehpc_pat_ops = {
@@ -269,7 +264,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
int i, err;
- xe_device_mem_access_get(xe);
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
goto err_fw;
@@ -292,7 +286,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
err_fw:
xe_assert(xe, !err);
- xe_device_mem_access_put(xe);
}
/*
@@ -310,6 +303,9 @@ static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
{
program_pat_mcr(gt, table, n_entries);
xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value);
+
+ if (IS_DGFX(gt_to_xe(gt)))
+ xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value);
}
static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
@@ -317,6 +313,9 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
{
program_pat(gt, table, n_entries);
xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
+
+ if (IS_DGFX(gt_to_xe(gt)))
+ xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value);
}
static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
@@ -325,7 +324,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
int i, err;
u32 pat;
- xe_device_mem_access_get(xe);
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
goto err_fw;
@@ -370,7 +368,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
err_fw:
xe_assert(xe, !err);
- xe_device_mem_access_put(xe);
}
static const struct xe_pat_ops xe2_pat_ops = {
@@ -438,6 +435,10 @@ void xe_pat_init_early(struct xe_device *xe)
/* VFs can't program nor dump PAT settings */
if (IS_SRIOV_VF(xe))
xe->pat.ops = NULL;
+
+ xe_assert(xe, !xe->pat.ops || xe->pat.ops->dump);
+ xe_assert(xe, !xe->pat.ops || xe->pat.ops->program_graphics);
+ xe_assert(xe, !xe->pat.ops || MEDIA_VER(xe) < 13 || xe->pat.ops->program_media);
}
void xe_pat_init(struct xe_gt *gt)
@@ -457,7 +458,7 @@ void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- if (!xe->pat.ops->dump)
+ if (!xe->pat.ops)
return;
xe->pat.ops->dump(gt, p);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 75a8817b2631..f326dbb1cecd 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -174,7 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
static const struct xe_graphics_desc graphics_xe2 = {
- .name = "Xe2_LPG",
+ .name = "Xe2_LPG / Xe2_HPG",
XE2_GFX_FEATURES,
};
@@ -185,8 +185,8 @@ static const struct xe_media_desc media_xem = {
.rel = 0,
.hw_engine_mask =
- BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
- BIT(XE_HW_ENGINE_VECS0),
+ GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+ GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0),
};
static const struct xe_media_desc media_xehpm = {
@@ -195,21 +195,23 @@ static const struct xe_media_desc media_xehpm = {
.rel = 55,
.hw_engine_mask =
- BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
- BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1),
+ GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+ GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0),
};
static const struct xe_media_desc media_xelpmp = {
.name = "Xe_LPM+",
.hw_engine_mask =
- BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
- BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0)
+ GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+ GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) |
+ BIT(XE_HW_ENGINE_GSCCS0)
};
static const struct xe_media_desc media_xe2 = {
- .name = "Xe2_LPM",
+ .name = "Xe2_LPM / Xe2_HPM",
.hw_engine_mask =
- BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
+ GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+ GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
};
static const struct xe_device_desc tgl_desc = {
@@ -337,6 +339,12 @@ static const struct xe_device_desc lnl_desc = {
.require_force_probe = true,
};
+static const struct xe_device_desc bmg_desc __maybe_unused = {
+ DGFX_FEATURES,
+ PLATFORM(XE_BATTLEMAGE),
+ .require_force_probe = true,
+};
+
#undef PLATFORM
__diag_pop();
@@ -344,12 +352,15 @@ __diag_pop();
static const struct gmdid_map graphics_ip_map[] = {
{ 1270, &graphics_xelpg },
{ 1271, &graphics_xelpg },
+ { 1274, &graphics_xelpg }, /* Xe_LPG+ */
+ { 2001, &graphics_xe2 },
{ 2004, &graphics_xe2 },
};
/* Map of GMD_ID values to media IP */
static const struct gmdid_map media_ip_map[] = {
{ 1300, &media_xelpmp },
+ { 1301, &media_xe2 },
{ 2000, &media_xe2 },
};
@@ -738,8 +749,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- xe_sriov_probe_early(xe, desc->has_sriov);
-
err = xe_device_probe_early(xe);
if (err)
return err;
@@ -775,18 +784,26 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
str_yes_no(xe_device_has_sriov(xe)),
xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
- xe_pm_init_early(xe);
+ err = xe_pm_init_early(xe);
+ if (err)
+ return err;
err = xe_device_probe(xe);
if (err)
return err;
- xe_pm_init(xe);
+ err = xe_pm_init(xe);
+ if (err)
+ goto err_driver_cleanup;
drm_dbg(&xe->drm, "d3cold: capable=%s\n",
str_yes_no(xe->d3cold.capable));
return 0;
+
+err_driver_cleanup:
+ xe_pci_remove(pdev);
+ return err;
}
static void xe_pci_shutdown(struct pci_dev *pdev)
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index b324dc2a5deb..c010ef16fbf5 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -10,6 +10,7 @@
#include <drm/drm_managed.h>
+#include "xe_device.h"
#include "xe_gt.h"
#include "xe_mmio.h"
#include "xe_pcode_api.h"
@@ -43,8 +44,6 @@ static int pcode_mailbox_status(struct xe_gt *gt)
[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
};
- lockdep_assert_held(&gt->pcode.lock);
-
err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
if (err) {
drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
@@ -55,17 +54,15 @@ static int pcode_mailbox_status(struct xe_gt *gt)
return 0;
}
-static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
- unsigned int timeout_ms, bool return_data,
- bool atomic)
+static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+ unsigned int timeout_ms, bool return_data,
+ bool atomic)
{
int err;
if (gt_to_xe(gt)->info.skip_pcode)
return 0;
- lockdep_assert_held(&gt->pcode.lock);
-
if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
return -EAGAIN;
@@ -74,7 +71,7 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox);
err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0,
- timeout_ms * 1000, NULL, atomic);
+ timeout_ms * USEC_PER_MSEC, NULL, atomic);
if (err)
return err;
@@ -87,6 +84,18 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
return pcode_mailbox_status(gt);
}
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+ unsigned int timeout_ms, bool return_data,
+ bool atomic)
+{
+ if (gt_to_xe(gt)->info.skip_pcode)
+ return 0;
+
+ lockdep_assert_held(&gt->pcode.lock);
+
+ return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic);
+}
+
int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
{
int err;
@@ -109,15 +118,19 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
return err;
}
-static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
- u32 request, u32 reply_mask, u32 reply,
- u32 *status, bool atomic, int timeout_us)
+static int pcode_try_request(struct xe_gt *gt, u32 mbox,
+ u32 request, u32 reply_mask, u32 reply,
+ u32 *status, bool atomic, int timeout_us, bool locked)
{
int slept, wait = 10;
for (slept = 0; slept < timeout_us; slept += wait) {
- *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
- atomic);
+ if (locked)
+ *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+ atomic);
+ else
+ *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+ atomic);
if ((*status == 0) && ((request & reply_mask) == reply))
return 0;
@@ -158,8 +171,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
mutex_lock(&gt->pcode.lock);
- ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
- false, timeout_base_ms * 1000);
+ ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+ false, timeout_base_ms * 1000, true);
if (!ret)
goto out;
@@ -177,8 +190,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
"PCODE timeout, retrying with preemption disabled\n");
drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
preempt_disable();
- ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
- true, timeout_base_ms * 1000);
+ ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+ true, timeout_base_ms * 1000, true);
preempt_enable();
out:
@@ -238,59 +251,71 @@ unlock:
}
/**
- * xe_pcode_init - Ensure PCODE is initialized
- * @gt: gt instance
+ * xe_pcode_ready - Ensure PCODE is initialized
+ * @xe: xe instance
+ * @locked: true if lock held, false otherwise
*
- * This function ensures that PCODE is properly initialized. To be called during
- * probe and resume paths.
+ * PCODE init mailbox is polled only on root gt of root tile
+ * as the root tile provides the initialization is complete only
+ * after all the tiles have completed the initialization.
+ * Called only on early probe without locks and with locks in
+ * resume path.
*
- * It returns 0 on success, and -error number on failure.
+ * Returns 0 on success, and -error number on failure.
*/
-int xe_pcode_init(struct xe_gt *gt)
+int xe_pcode_ready(struct xe_device *xe, bool locked)
{
u32 status, request = DGFX_GET_INIT_STATUS;
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
int timeout_us = 180000000; /* 3 min */
int ret;
- if (gt_to_xe(gt)->info.skip_pcode)
+ if (xe->info.skip_pcode)
return 0;
- if (!IS_DGFX(gt_to_xe(gt)))
+ if (!IS_DGFX(xe))
return 0;
- mutex_lock(&gt->pcode.lock);
- ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request,
- DGFX_INIT_STATUS_COMPLETE,
- DGFX_INIT_STATUS_COMPLETE,
- &status, false, timeout_us);
- mutex_unlock(&gt->pcode.lock);
+ if (locked)
+ mutex_lock(&gt->pcode.lock);
+
+ ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request,
+ DGFX_INIT_STATUS_COMPLETE,
+ DGFX_INIT_STATUS_COMPLETE,
+ &status, false, timeout_us, locked);
+
+ if (locked)
+ mutex_unlock(&gt->pcode.lock);
if (ret)
- drm_err(&gt_to_xe(gt)->drm,
+ drm_err(&xe->drm,
"PCODE initialization timedout after: 3 min\n");
return ret;
}
/**
- * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * xe_pcode_init: initialize components of PCODE
* @gt: gt instance
*
- * This function initializes the xe_pcode component, and when needed, it ensures
- * that PCODE has properly performed its initialization and it is really ready
- * to go. To be called once only during probe.
- *
- * It returns 0 on success, and -error number on failure.
+ * This function initializes the xe_pcode component.
+ * To be called once only during probe.
*/
-int xe_pcode_probe(struct xe_gt *gt)
+void xe_pcode_init(struct xe_gt *gt)
{
drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
+}
- if (gt_to_xe(gt)->info.skip_pcode)
- return 0;
-
- if (!IS_DGFX(gt_to_xe(gt)))
- return 0;
-
- return xe_pcode_init(gt);
+/**
+ * xe_pcode_probe_early: initializes PCODE
+ * @xe: xe instance
+ *
+ * This function checks the initialization status of PCODE
+ * To be called once only during early probe without locks.
+ *
+ * Returns 0 on success, error code otherwise
+ */
+int xe_pcode_probe_early(struct xe_device *xe)
+{
+ return xe_pcode_ready(xe, false);
}
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
index 08cb1d047cba..3f54c6d2a57d 100644
--- a/drivers/gpu/drm/xe/xe_pcode.h
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -8,9 +8,11 @@
#include <linux/types.h>
struct xe_gt;
+struct xe_device;
-int xe_pcode_probe(struct xe_gt *gt);
-int xe_pcode_init(struct xe_gt *gt);
+void xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_probe_early(struct xe_device *xe);
+int xe_pcode_ready(struct xe_device *xe, bool locked);
int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
u32 max_gt_freq);
int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 553f53dbd093..79b7042c4534 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -22,6 +22,7 @@ enum xe_platform {
XE_PVC,
XE_METEORLAKE,
XE_LUNARLAKE,
+ XE_BATTLEMAGE,
};
enum xe_subplatform {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 53b3b0b019ac..37fbeda12d3b 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -25,23 +25,55 @@
/**
* DOC: Xe Power Management
*
- * Xe PM shall be guided by the simplicity.
- * Use the simplest hook options whenever possible.
- * Let's not reinvent the runtime_pm references and hooks.
- * Shall have a clear separation of display and gt underneath this component.
+ * Xe PM implements the main routines for both system level suspend states and
+ * for the opportunistic runtime suspend states.
*
- * What's next:
+ * System Level Suspend (S-States) - In general this is OS initiated suspend
+ * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
+ * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
+ * are the main point for the suspend to and resume from these states.
*
- * For now s2idle and s3 are only working in integrated devices. The next step
- * is to iterate through all VRAM's BO backing them up into the system memory
- * before allowing the system suspend.
+ * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
+ * state D3, controlled by the PCI subsystem and ACPI with the help from the
+ * runtime_pm infrastructure.
+ * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
+ * alive and quicker low latency resume or D3Cold where Vcc power is off for
+ * better power savings.
+ * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
+ * level, while the device driver can be behind multiple bridges/switches and
+ * paired with other devices. For this reason, the PCI subsystem cannot perform
+ * the transition towards D3Cold. The lowest runtime PM possible from the PCI
+ * subsystem is D3hot. Then, if all these paired devices in the same root port
+ * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
+ * to perform the transition from D3hot to D3cold. Xe may disallow this
+ * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
+ * suspend. It will be based on runtime conditions such as VRAM usage for a
+ * quick and low latency resume for instance.
*
- * Also runtime_pm needs to be here from the beginning.
+ * Runtime PM - This infrastructure provided by the Linux kernel allows the
+ * device drivers to indicate when the can be runtime suspended, so the device
+ * could be put at D3 (if supported), or allow deeper package sleep states
+ * (PC-states), and/or other low level power states. Xe PM component provides
+ * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
+ * subsystem will call before transition to/from runtime suspend.
*
- * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
- * and no wait boost. Frequency optimizations should come on a next stage.
+ * Also, Xe PM provides get and put functions that Xe driver will use to
+ * indicate activity. In order to avoid locking complications with the memory
+ * management, whenever possible, these get and put functions needs to be called
+ * from the higher/outer levels.
+ * The main cases that need to be protected from the outer levels are: IOCTL,
+ * sysfs, debugfs, dma-buf sharing, GPU execution.
+ *
+ * This component is not responsible for GT idleness (RC6) nor GT frequency
+ * management (RPS).
*/
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map xe_pm_runtime_lockdep_map = {
+ .name = "xe_pm_runtime_lockdep_map"
+};
+#endif
+
/**
* xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
* @xe: xe device instance
@@ -54,13 +86,15 @@ int xe_pm_suspend(struct xe_device *xe)
u8 id;
int err;
+ drm_dbg(&xe->drm, "Suspending device\n");
+
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
- return err;
+ goto err;
xe_display_pm_suspend(xe);
@@ -68,7 +102,7 @@ int xe_pm_suspend(struct xe_device *xe)
err = xe_gt_suspend(gt);
if (err) {
xe_display_pm_resume(xe);
- return err;
+ goto err;
}
}
@@ -76,7 +110,11 @@ int xe_pm_suspend(struct xe_device *xe)
xe_display_pm_suspend_late(xe);
+ drm_dbg(&xe->drm, "Device suspended\n");
return 0;
+err:
+ drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
+ return err;
}
/**
@@ -92,14 +130,14 @@ int xe_pm_resume(struct xe_device *xe)
u8 id;
int err;
+ drm_dbg(&xe->drm, "Resuming device\n");
+
for_each_tile(tile, xe, id)
xe_wa_apply_tile_workarounds(tile);
- for_each_gt(gt, xe, id) {
- err = xe_pcode_init(gt);
- if (err)
- return err;
- }
+ err = xe_pcode_ready(xe, true);
+ if (err)
+ return err;
xe_display_pm_resume_early(xe);
@@ -109,7 +147,7 @@ int xe_pm_resume(struct xe_device *xe)
*/
err = xe_bo_restore_kernel(xe);
if (err)
- return err;
+ goto err;
xe_irq_resume(xe);
@@ -120,9 +158,13 @@ int xe_pm_resume(struct xe_device *xe)
err = xe_bo_restore_user(xe);
if (err)
- return err;
+ goto err;
+ drm_dbg(&xe->drm, "Device resumed\n");
return 0;
+err:
+ drm_dbg(&xe->drm, "Device resume failed %d\n", err);
+ return err;
}
static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
@@ -172,30 +214,60 @@ static void xe_pm_runtime_init(struct xe_device *xe)
pm_runtime_put(dev);
}
-void xe_pm_init_early(struct xe_device *xe)
+int xe_pm_init_early(struct xe_device *xe)
{
+ int err;
+
INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
- drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
+
+ err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
+ if (err)
+ return err;
+
+ err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
+ if (err)
+ return err;
+
+ return 0;
}
-void xe_pm_init(struct xe_device *xe)
+/**
+ * xe_pm_init - Initialize Xe Power Management
+ * @xe: xe device instance
+ *
+ * This component is responsible for System and Device sleep states.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_pm_init(struct xe_device *xe)
{
+ int err;
+
/* For now suspend/resume is only allowed with GuC */
if (!xe_device_uc_enabled(xe))
- return;
-
- drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
+ return 0;
xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
if (xe->d3cold.capable) {
- xe_device_sysfs_init(xe);
- xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+ err = xe_device_sysfs_init(xe);
+ if (err)
+ return err;
+
+ err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+ if (err)
+ return err;
}
xe_pm_runtime_init(xe);
+
+ return 0;
}
+/**
+ * xe_pm_runtime_fini - Finalize Runtime PM
+ * @xe: xe device instance
+ */
void xe_pm_runtime_fini(struct xe_device *xe)
{
struct device *dev = xe->drm.dev;
@@ -225,6 +297,28 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
return READ_ONCE(xe->pm_callback_task);
}
+/**
+ * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
+ * @xe: xe device instance
+ *
+ * This does not provide any guarantee that the device is going to remain
+ * suspended as it might be racing with the runtime state transitions.
+ * It can be used only as a non-reliable assertion, to ensure that we are not in
+ * the sleep state while trying to access some memory for instance.
+ *
+ * Returns true if PCI device is suspended, false otherwise.
+ */
+bool xe_pm_runtime_suspended(struct xe_device *xe)
+{
+ return pm_runtime_suspended(xe->drm.dev);
+}
+
+/**
+ * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
+ * @xe: xe device instance
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
int xe_pm_runtime_suspend(struct xe_device *xe)
{
struct xe_bo *bo, *on;
@@ -232,18 +326,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
u8 id;
int err = 0;
- if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
- return -EBUSY;
-
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
/*
- * The actual xe_device_mem_access_put() is always async underneath, so
+ * The actual xe_pm_runtime_put() is always async underneath, so
* exactly where that is called should makes no difference to us. However
* we still need to be very careful with the locks that this callback
* acquires and the locks that are acquired and held by any callers of
- * xe_device_mem_access_get(). We already have the matching annotation
+ * xe_runtime_pm_get(). We already have the matching annotation
* on that side, but we also need it here. For example lockdep should be
* able to tell us if the following scenario is in theory possible:
*
@@ -251,15 +342,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
* lock(A) |
* | xe_pm_runtime_suspend()
* | lock(A)
- * xe_device_mem_access_get() |
+ * xe_pm_runtime_get() |
*
* This will clearly deadlock since rpm core needs to wait for
* xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
* on CPU0 which prevents CPU1 making forward progress. With the
- * annotation here and in xe_device_mem_access_get() lockdep will see
+ * annotation here and in xe_pm_runtime_get() lockdep will see
* the potential lock inversion and give us a nice splat.
*/
- lock_map_acquire(&xe_device_mem_access_lockdep_map);
+ lock_map_acquire(&xe_pm_runtime_lockdep_map);
/*
* Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
@@ -285,11 +376,17 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_irq_suspend(xe);
out:
- lock_map_release(&xe_device_mem_access_lockdep_map);
+ lock_map_release(&xe_pm_runtime_lockdep_map);
xe_pm_write_callback_task(xe, NULL);
return err;
}
+/**
+ * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
+ * @xe: xe device instance
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
int xe_pm_runtime_resume(struct xe_device *xe)
{
struct xe_gt *gt;
@@ -299,7 +396,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
- lock_map_acquire(&xe_device_mem_access_lockdep_map);
+ lock_map_acquire(&xe_pm_runtime_lockdep_map);
/*
* It can be possible that xe has allowed d3cold but other pcie devices
@@ -310,11 +407,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
if (xe->d3cold.allowed && xe->d3cold.power_lost) {
- for_each_gt(gt, xe, id) {
- err = xe_pcode_init(gt);
- if (err)
- goto out;
- }
+ err = xe_pcode_ready(xe, true);
+ if (err)
+ goto out;
/*
* This only restores pinned memory which is the memory
@@ -336,27 +431,147 @@ int xe_pm_runtime_resume(struct xe_device *xe)
goto out;
}
out:
- lock_map_release(&xe_device_mem_access_lockdep_map);
+ lock_map_release(&xe_pm_runtime_lockdep_map);
xe_pm_write_callback_task(xe, NULL);
return err;
}
-int xe_pm_runtime_get(struct xe_device *xe)
+/*
+ * For places where resume is synchronous it can be quite easy to deadlock
+ * if we are not careful. Also in practice it might be quite timing
+ * sensitive to ever see the 0 -> 1 transition with the callers locks
+ * held, so deadlocks might exist but are hard for lockdep to ever see.
+ * With this in mind, help lockdep learn about the potentially scary
+ * stuff that can happen inside the runtime_resume callback by acquiring
+ * a dummy lock (it doesn't protect anything and gets compiled out on
+ * non-debug builds). Lockdep then only needs to see the
+ * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can
+ * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map.
+ * For example if the (callers_locks) are ever grabbed in the
+ * runtime_resume callback, lockdep should give us a nice splat.
+ */
+static void pm_runtime_lockdep_prime(void)
+{
+ lock_map_acquire(&xe_pm_runtime_lockdep_map);
+ lock_map_release(&xe_pm_runtime_lockdep_map);
+}
+
+/**
+ * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
+ * @xe: xe device instance
+ */
+void xe_pm_runtime_get(struct xe_device *xe)
{
- return pm_runtime_get_sync(xe->drm.dev);
+ pm_runtime_get_noresume(xe->drm.dev);
+
+ if (xe_pm_read_callback_task(xe) == current)
+ return;
+
+ pm_runtime_lockdep_prime();
+ pm_runtime_resume(xe->drm.dev);
}
-int xe_pm_runtime_put(struct xe_device *xe)
+/**
+ * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
+ * @xe: xe device instance
+ */
+void xe_pm_runtime_put(struct xe_device *xe)
{
- pm_runtime_mark_last_busy(xe->drm.dev);
- return pm_runtime_put(xe->drm.dev);
+ if (xe_pm_read_callback_task(xe) == current) {
+ pm_runtime_put_noidle(xe->drm.dev);
+ } else {
+ pm_runtime_mark_last_busy(xe->drm.dev);
+ pm_runtime_put(xe->drm.dev);
+ }
}
+/**
+ * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
+ * @xe: xe device instance
+ *
+ * Returns: Any number greater than or equal to 0 for success, negative error
+ * code otherwise.
+ */
+int xe_pm_runtime_get_ioctl(struct xe_device *xe)
+{
+ if (WARN_ON(xe_pm_read_callback_task(xe) == current))
+ return -ELOOP;
+
+ pm_runtime_lockdep_prime();
+ return pm_runtime_get_sync(xe->drm.dev);
+}
+
+/**
+ * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
+ * @xe: xe device instance
+ *
+ * Returns: Any number greater than or equal to 0 for success, negative error
+ * code otherwise.
+ */
int xe_pm_runtime_get_if_active(struct xe_device *xe)
{
return pm_runtime_get_if_active(xe->drm.dev);
}
+/**
+ * xe_pm_runtime_get_if_in_use - Get a runtime_pm reference and resume if needed
+ * @xe: xe device instance
+ *
+ * Returns: True if device is awake and the reference was taken, false otherwise.
+ */
+bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
+{
+ if (xe_pm_read_callback_task(xe) == current) {
+ /* The device is awake, grab the ref and move on */
+ pm_runtime_get_noresume(xe->drm.dev);
+ return true;
+ }
+
+ return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
+}
+
+/**
+ * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
+ * @xe: xe device instance
+ *
+ * This function should be used in inner places where it is surely already
+ * protected by outer-bound callers of `xe_pm_runtime_get`.
+ * It will warn if not protected.
+ * The reference should be put back after this function regardless, since it
+ * will always bump the usage counter, regardless.
+ */
+void xe_pm_runtime_get_noresume(struct xe_device *xe)
+{
+ bool ref;
+
+ ref = xe_pm_runtime_get_if_in_use(xe);
+
+ if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n"))
+ pm_runtime_get_noresume(xe->drm.dev);
+}
+
+/**
+ * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
+ * @xe: xe device instance
+ *
+ * Returns: True if device is awake and the reference was taken, false otherwise.
+ */
+bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
+{
+ if (xe_pm_read_callback_task(xe) == current) {
+ /* The device is awake, grab the ref and move on */
+ pm_runtime_get_noresume(xe->drm.dev);
+ return true;
+ }
+
+ pm_runtime_lockdep_prime();
+ return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
+}
+
+/**
+ * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
+ * @xe: xe device instance
+ */
void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -371,6 +586,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
}
}
+/**
+ * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
+ * @xe: xe device instance
+ * @threshold: VRAM size in bites for the D3cold threshold
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
{
struct ttm_resource_manager *man;
@@ -395,6 +617,13 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
return 0;
}
+/**
+ * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
+ * @xe: xe device instance
+ *
+ * To be called during runtime_pm idle callback.
+ * Check for all the D3Cold conditions ahead of runtime suspend.
+ */
void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
{
struct ttm_resource_manager *man;
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 64a97c6726a7..18b0613fe57b 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -20,14 +20,19 @@ struct xe_device;
int xe_pm_suspend(struct xe_device *xe);
int xe_pm_resume(struct xe_device *xe);
-void xe_pm_init_early(struct xe_device *xe);
-void xe_pm_init(struct xe_device *xe);
+int xe_pm_init_early(struct xe_device *xe);
+int xe_pm_init(struct xe_device *xe);
void xe_pm_runtime_fini(struct xe_device *xe);
+bool xe_pm_runtime_suspended(struct xe_device *xe);
int xe_pm_runtime_suspend(struct xe_device *xe);
int xe_pm_runtime_resume(struct xe_device *xe);
-int xe_pm_runtime_get(struct xe_device *xe);
-int xe_pm_runtime_put(struct xe_device *xe);
+void xe_pm_runtime_get(struct xe_device *xe);
+int xe_pm_runtime_get_ioctl(struct xe_device *xe);
+void xe_pm_runtime_put(struct xe_device *xe);
int xe_pm_runtime_get_if_active(struct xe_device *xe);
+bool xe_pm_runtime_get_if_in_use(struct xe_device *xe);
+void xe_pm_runtime_get_noresume(struct xe_device *xe);
+bool xe_pm_runtime_resume_and_get(struct xe_device *xe);
void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 4efc8c1a3d7a..5b7930f46cf3 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -5,6 +5,7 @@
#include "xe_pt.h"
+#include "regs/xe_gtt_defs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_drm_client.h"
@@ -108,11 +109,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
pt->level = level;
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
- XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_NO_RESV_EVICT |
- XE_BO_PAGETABLE);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_NO_RESV_EVICT |
+ XE_BO_FLAG_PAGETABLE);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto err_kfree;
@@ -618,7 +619,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
int ret;
- if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
+ if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
(is_devmem || !IS_DGFX(xe)))
xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 075f9eaef031..df407d73e5f5 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -12,6 +12,7 @@
#include <drm/xe_drm.h>
#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
@@ -147,8 +148,8 @@ query_engine_cycles(struct xe_device *xe,
if (!hwe)
return -EINVAL;
- xe_device_mem_access_get(xe);
- xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
+ return -EIO;
__read_timestamps(gt,
RING_TIMESTAMP(hwe->mmio_base),
@@ -159,7 +160,6 @@ query_engine_cycles(struct xe_device *xe,
cpu_clock);
xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- xe_device_mem_access_put(xe);
resp.width = 36;
/* Only write to the output fields of user query */
@@ -403,6 +403,13 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
BIT(gt_to_tile(gt)->id) << 1;
gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^
gt_list->gt_list[id].near_mem_regions;
+
+ gt_list->gt_list[id].ip_ver_major =
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid);
+ gt_list->gt_list[id].ip_ver_minor =
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid);
+ gt_list->gt_list[id].ip_ver_rev =
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid);
}
if (copy_to_user(query_ptr, gt_list, size)) {
@@ -433,9 +440,7 @@ static int query_hwconfig(struct xe_device *xe,
if (!hwconfig)
return -ENOMEM;
- xe_device_mem_access_get(xe);
xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig);
- xe_device_mem_access_put(xe);
if (copy_to_user(query_ptr, hwconfig, size)) {
kfree(hwconfig);
@@ -544,14 +549,44 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query)
version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
break;
}
+ case XE_QUERY_UC_TYPE_HUC: {
+ struct xe_gt *media_gt = NULL;
+ struct xe_huc *huc;
+
+ if (MEDIA_VER(xe) >= 13) {
+ struct xe_tile *tile;
+ u8 gt_id;
+
+ for_each_tile(tile, xe, gt_id) {
+ if (tile->media_gt) {
+ media_gt = tile->media_gt;
+ break;
+ }
+ }
+ } else {
+ media_gt = xe->tiles[0].primary_gt;
+ }
+
+ if (!media_gt)
+ break;
+
+ huc = &media_gt->uc.huc;
+ if (huc->fw.status == XE_UC_FIRMWARE_RUNNING)
+ version = &huc->fw.versions.found[XE_UC_FW_VER_RELEASE];
+ break;
+ }
default:
return -EINVAL;
}
- resp.branch_ver = 0;
- resp.major_ver = version->major;
- resp.minor_ver = version->minor;
- resp.patch_ver = version->patch;
+ if (version) {
+ resp.branch_ver = 0;
+ resp.major_ver = version->major;
+ resp.minor_ver = version->minor;
+ resp.patch_ver = version->patch;
+ } else {
+ return -ENODEV;
+ }
if (copy_to_user(query_ptr, &resp, size))
return -EFAULT;
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 5b2b37b59813..d42b3f33bd7a 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -17,6 +17,7 @@
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_sched_job.h"
+#include "xe_sriov.h"
#include "xe_vm_types.h"
#include "xe_vm.h"
#include "xe_wa.h"
@@ -367,10 +368,12 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
- /* XXX: Do we need this? Leaving for now. */
- dw[i++] = preparser_disable(true);
- i = emit_flush_invalidate(0, dw, i);
- dw[i++] = preparser_disable(false);
+ if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
+ /* XXX: Do we need this? Leaving for now. */
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_invalidate(0, dw, i);
+ dw[i++] = preparser_disable(false);
+ }
i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 2c4632259edd..8941522b7705 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -48,8 +48,9 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
sa_manager->bo = NULL;
bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
- XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(bo)) {
drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
PTR_ERR(bo));
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index b0c7fa4693cf..cd8a2fba5438 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -5,6 +5,7 @@
#include "xe_sched_job.h"
+#include <drm/xe_drm.h>
#include <linux/dma-fence-array.h>
#include <linux/slab.h>
@@ -15,6 +16,8 @@
#include "xe_hw_fence.h"
#include "xe_lrc.h"
#include "xe_macros.h"
+#include "xe_pm.h"
+#include "xe_sync_types.h"
#include "xe_trace.h"
#include "xe_vm.h"
@@ -157,7 +160,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
/* All other jobs require a VM to be open which has a ref */
if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
- xe_device_mem_access_get(job_to_xe(job));
+ xe_pm_runtime_get_noresume(job_to_xe(job));
xe_device_assert_mem_access(job_to_xe(job));
trace_xe_sched_job_create(job);
@@ -190,7 +193,7 @@ void xe_sched_job_destroy(struct kref *ref)
container_of(ref, struct xe_sched_job, refcount);
if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
- xe_device_mem_access_put(job_to_xe(job));
+ xe_pm_runtime_put(job_to_xe(job));
xe_exec_queue_put(job->q);
dma_fence_put(job->fence);
drm_sched_job_cleanup(&job->drm);
@@ -288,6 +291,22 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
return drm_sched_job_add_dependency(&job->drm, fence);
}
+/**
+ * xe_sched_job_init_user_fence - Initialize user_fence for the job
+ * @job: job whose user_fence needs an init
+ * @sync: sync to be use to init user_fence
+ */
+void xe_sched_job_init_user_fence(struct xe_sched_job *job,
+ struct xe_sync_entry *sync)
+{
+ if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE)
+ return;
+
+ job->user_fence.used = true;
+ job->user_fence.addr = sync->addr;
+ job->user_fence.value = sync->timeline_value;
+}
+
struct xe_sched_job_snapshot *
xe_sched_job_snapshot_capture(struct xe_sched_job *job)
{
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index f1a660648cf0..c75018f4660d 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -10,6 +10,7 @@
struct drm_printer;
struct xe_vm;
+struct xe_sync_entry;
#define XE_SCHED_HANG_LIMIT 1
#define XE_SCHED_JOB_TIMEOUT LONG_MAX
@@ -58,6 +59,8 @@ void xe_sched_job_arm(struct xe_sched_job *job);
void xe_sched_job_push(struct xe_sched_job *job);
int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
+void xe_sched_job_init_user_fence(struct xe_sched_job *job,
+ struct xe_sync_entry *sync);
static inline struct xe_sched_job *
to_xe_sched_job(struct drm_sched_job *drm)
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index f295d91886b1..1c3fa84b6adb 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -5,8 +5,13 @@
#include <drm/drm_managed.h>
+#include "regs/xe_sriov_regs.h"
+
#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_sriov_pf.h"
/**
* xe_sriov_mode_to_string - Convert enum value to string.
@@ -28,10 +33,16 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
}
}
+static bool test_is_vf(struct xe_device *xe)
+{
+ u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+
+ return value & VF_CAP;
+}
+
/**
* xe_sriov_probe_early - Probe a SR-IOV mode.
* @xe: the &xe_device to probe mode on
- * @has_sriov: flag indicating hardware support for SR-IOV
*
* This function should be called only once and as soon as possible during
* driver probe to detect whether we are running a SR-IOV Physical Function
@@ -40,12 +51,17 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
* SR-IOV PF mode detection is based on PCI @dev_is_pf() function.
* SR-IOV VF mode detection is based on dedicated MMIO register read.
*/
-void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
+void xe_sriov_probe_early(struct xe_device *xe)
{
enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE;
+ bool has_sriov = xe->info.has_sriov;
- /* TODO: replace with proper mode detection */
- xe_assert(xe, !has_sriov);
+ if (has_sriov) {
+ if (test_is_vf(xe))
+ mode = XE_SRIOV_MODE_VF;
+ else if (xe_sriov_pf_readiness(xe))
+ mode = XE_SRIOV_MODE_PF;
+ }
xe_assert(xe, !xe->sriov.__mode);
xe->sriov.__mode = mode;
@@ -78,6 +94,13 @@ int xe_sriov_init(struct xe_device *xe)
if (!IS_SRIOV(xe))
return 0;
+ if (IS_SRIOV_PF(xe)) {
+ int err = xe_sriov_pf_init_early(xe);
+
+ if (err)
+ return err;
+ }
+
xe_assert(xe, !xe->sriov.wq);
xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
if (!xe->sriov.wq)
@@ -85,3 +108,34 @@ int xe_sriov_init(struct xe_device *xe)
return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe);
}
+
+/**
+ * xe_sriov_print_info - Print basic SR-IOV information.
+ * @xe: the &xe_device to print info from
+ * @p: the &drm_printer
+ *
+ * Print SR-IOV related information into provided DRM printer.
+ */
+void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p)
+{
+ drm_printf(p, "supported: %s\n", str_yes_no(xe_device_has_sriov(xe)));
+ drm_printf(p, "enabled: %s\n", str_yes_no(IS_SRIOV(xe)));
+ drm_printf(p, "mode: %s\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+}
+
+/**
+ * xe_sriov_function_name() - Get SR-IOV Function name.
+ * @n: the Function number (identifier) to get name of
+ * @buf: the buffer to format to
+ * @size: size of the buffer (shall be at least 5 bytes)
+ *
+ * Return: formatted function name ("PF" or "VF%u").
+ */
+const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size)
+{
+ if (n)
+ snprintf(buf, size, "VF%u", n);
+ else
+ strscpy(buf, "PF", size);
+ return buf;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 1545552162c9..486bb21c3256 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -10,9 +10,13 @@
#include "xe_device_types.h"
#include "xe_sriov_types.h"
+struct drm_printer;
+
const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode);
+const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len);
-void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov);
+void xe_sriov_probe_early(struct xe_device *xe);
+void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p);
int xe_sriov_init(struct xe_device *xe);
static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c
new file mode 100644
index 000000000000..0f721ae17b26
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_module.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_printk.h"
+
+static unsigned int wanted_max_vfs(struct xe_device *xe)
+{
+ return xe_modparam.max_vfs;
+}
+
+static int pf_reduce_totalvfs(struct xe_device *xe, int limit)
+{
+ struct device *dev = xe->drm.dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int err;
+
+ err = pci_sriov_set_totalvfs(pdev, limit);
+ if (err)
+ xe_sriov_notice(xe, "Failed to set number of VFs to %d (%pe)\n",
+ limit, ERR_PTR(err));
+ return err;
+}
+
+static bool pf_continue_as_native(struct xe_device *xe, const char *why)
+{
+ xe_sriov_dbg(xe, "%s, continuing as native\n", why);
+ pf_reduce_totalvfs(xe, 0);
+ return false;
+}
+
+/**
+ * xe_sriov_pf_readiness - Check if PF functionality can be enabled.
+ * @xe: the &xe_device to check
+ *
+ * This function is called as part of the SR-IOV probe to validate if all
+ * PF prerequisites are satisfied and we can continue with enabling PF mode.
+ *
+ * Return: true if the PF mode can be turned on.
+ */
+bool xe_sriov_pf_readiness(struct xe_device *xe)
+{
+ struct device *dev = xe->drm.dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int totalvfs = pci_sriov_get_totalvfs(pdev);
+ int newlimit = min_t(u16, wanted_max_vfs(xe), totalvfs);
+
+ xe_assert(xe, totalvfs <= U16_MAX);
+
+ if (!dev_is_pf(dev))
+ return false;
+
+ if (!xe_device_uc_enabled(xe))
+ return pf_continue_as_native(xe, "Guc submission disabled");
+
+ if (!newlimit)
+ return pf_continue_as_native(xe, "all VFs disabled");
+
+ pf_reduce_totalvfs(xe, newlimit);
+
+ xe->sriov.pf.device_total_vfs = totalvfs;
+ xe->sriov.pf.driver_max_vfs = newlimit;
+
+ return true;
+}
+
+/**
+ * xe_sriov_pf_init_early - Initialize SR-IOV PF specific data.
+ * @xe: the &xe_device to initialize
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_init_early(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock);
+}
+
+/**
+ * xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information.
+ * @xe: the &xe_device to print info from
+ * @p: the &drm_printer
+ *
+ * Print SR-IOV PF related information into provided DRM printer.
+ */
+void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ drm_printf(p, "total: %u\n", xe->sriov.pf.device_total_vfs);
+ drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs);
+ drm_printf(p, "enabled: %u\n", pci_num_vf(pdev));
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h
new file mode 100644
index 000000000000..d1220e70e1c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_H_
+#define _XE_SRIOV_PF_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_device;
+
+#ifdef CONFIG_PCI_IOV
+bool xe_sriov_pf_readiness(struct xe_device *xe);
+int xe_sriov_pf_init_early(struct xe_device *xe);
+void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p);
+#else
+static inline bool xe_sriov_pf_readiness(struct xe_device *xe)
+{
+ return false;
+}
+
+static inline int xe_sriov_pf_init_early(struct xe_device *xe)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
new file mode 100644
index 000000000000..7d156ba82479
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_HELPERS_H_
+#define _XE_SRIOV_PF_HELPERS_H_
+
+#include "xe_assert.h"
+#include "xe_device_types.h"
+#include "xe_sriov.h"
+#include "xe_sriov_types.h"
+
+/**
+ * xe_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging.
+ * @xe: the PF &xe_device to assert on
+ * @vfid: the VF number to assert
+ *
+ * Assert that &xe represents the Physical Function (PF) device and provided &vfid
+ * is within a range of supported VF numbers (up to maximum number of VFs that
+ * driver can support, including VF0 that represents the PF itself).
+ *
+ * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ */
+#define xe_sriov_pf_assert_vfid(xe, vfid) \
+ xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe))
+
+/**
+ * xe_sriov_pf_get_totalvfs() - Get maximum number of VFs that driver can support.
+ * @xe: the &xe_device to query (shall be PF)
+ *
+ * Return: Maximum number of VFs that this PF driver supports.
+ */
+static inline int xe_sriov_pf_get_totalvfs(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ return xe->sriov.pf.driver_max_vfs;
+}
+
+static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ return &xe->sriov.pf.master_lock;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
index 1a138108d139..c7b7ad4af5c8 100644
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -7,6 +7,8 @@
#define _XE_SRIOV_TYPES_H_
#include <linux/build_bug.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
/**
* VFID - Virtual Function Identifier
@@ -37,4 +39,21 @@ enum xe_sriov_mode {
};
static_assert(XE_SRIOV_MODE_NONE);
+/**
+ * struct xe_device_pf - Xe PF related data
+ *
+ * The data in this structure is valid only if driver is running in the
+ * @XE_SRIOV_MODE_PF mode.
+ */
+struct xe_device_pf {
+ /** @device_total_vfs: Maximum number of VFs supported by the device. */
+ u16 device_total_vfs;
+
+ /** @driver_max_vfs: Maximum number of VFs supported by the driver. */
+ u16 driver_max_vfs;
+
+ /** @master_lock: protects all VFs configurations across GTs */
+ struct mutex master_lock;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 02c9577fe418..65f1f1628235 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -224,8 +224,7 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
return 0;
}
-void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
- struct dma_fence *fence)
+void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence)
{
if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
return;
@@ -254,10 +253,6 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
user_fence_put(sync->ufence);
dma_fence_put(fence);
}
- } else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) {
- job->user_fence.used = true;
- job->user_fence.addr = sync->addr;
- job->user_fence.value = sync->timeline_value;
}
}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 0fd0d51208e6..3e03396af2c6 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -26,7 +26,6 @@ int xe_sync_entry_wait(struct xe_sync_entry *sync);
int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
struct xe_sched_job *job);
void xe_sync_entry_signal(struct xe_sync_entry *sync,
- struct xe_sched_job *job,
struct dma_fence *fence);
void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
struct dma_fence *
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 0650b2fa75ef..15ea0a942f67 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -160,24 +160,19 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
{
int err;
- xe_device_mem_access_get(tile_to_xe(tile));
-
err = tile_ttm_mgr_init(tile);
if (err)
- goto err_mem_access;
+ return err;
tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
- if (IS_ERR(tile->mem.kernel_bb_pool)) {
- err = PTR_ERR(tile->mem.kernel_bb_pool);
- goto err_mem_access;
- }
+ if (IS_ERR(tile->mem.kernel_bb_pool))
+ return PTR_ERR(tile->mem.kernel_bb_pool);
+
xe_wa_apply_tile_workarounds(tile);
- xe_tile_sysfs_init(tile);
+ err = xe_tile_sysfs_init(tile);
-err_mem_access:
- xe_device_mem_access_put(tile_to_xe(tile));
- return err;
+ return 0;
}
void xe_tile_migrate_wait(struct xe_tile *tile)
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 0662968d7bcb..64661403afcd 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -7,6 +7,7 @@
#include <linux/sysfs.h>
#include <drm/drm_managed.h>
+#include "xe_pm.h"
#include "xe_tile.h"
#include "xe_tile_sysfs.h"
#include "xe_vram_freq.h"
@@ -28,7 +29,7 @@ static void tile_sysfs_fini(struct drm_device *drm, void *arg)
kobject_put(tile->sysfs);
}
-void xe_tile_sysfs_init(struct xe_tile *tile)
+int xe_tile_sysfs_init(struct xe_tile *tile)
{
struct xe_device *xe = tile_to_xe(tile);
struct device *dev = xe->drm.dev;
@@ -37,7 +38,7 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
kt = kzalloc(sizeof(*kt), GFP_KERNEL);
if (!kt)
- return;
+ return -ENOMEM;
kobject_init(&kt->base, &xe_tile_sysfs_kobj_type);
kt->tile = tile;
@@ -45,16 +46,14 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id);
if (err) {
kobject_put(&kt->base);
- drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err);
- return;
+ return err;
}
tile->sysfs = &kt->base;
- xe_vram_freq_sysfs_init(tile);
-
- err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
+ err = xe_vram_freq_sysfs_init(tile);
if (err)
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
+ return err;
+
+ return drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
}
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h
index e4f065039eba..54a2ba8ba533 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h
@@ -8,7 +8,7 @@
#include "xe_tile_sysfs_types.h"
-void xe_tile_sysfs_init(struct xe_tile *tile);
+int xe_tile_sysfs_init(struct xe_tile *tile);
static inline struct xe_tile *
kobj_to_tile(struct kobject *kobj)
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 846f14507d5f..2d56cfc09e42 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -258,7 +258,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__field(u32, guc_state)
__field(u32, flags)
__field(int, error)
- __field(u64, fence)
+ __field(struct dma_fence *, fence)
__field(u64, batch_addr)
),
@@ -269,11 +269,11 @@ DECLARE_EVENT_CLASS(xe_sched_job,
atomic_read(&job->q->guc->state);
__entry->flags = job->q->flags;
__entry->error = job->fence->error;
- __entry->fence = (unsigned long)job->fence;
+ __entry->fence = job->fence;
__entry->batch_addr = (u64)job->batch_addr[0];
),
- TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+ TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
__entry->fence, __entry->seqno, __entry->guc_id,
__entry->batch_addr, __entry->guc_state,
__entry->flags, __entry->error)
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 3107d2a12426..f77367329760 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -204,9 +204,14 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
{
struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- u64 stolen_size, io_size, pgsize;
+ u64 stolen_size, io_size;
int err;
+ if (!mgr) {
+ drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n");
+ return;
+ }
+
if (IS_SRIOV_VF(xe))
stolen_size = 0;
else if (IS_DGFX(xe))
@@ -221,10 +226,6 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
return;
}
- pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
- if (pgsize < PAGE_SIZE)
- pgsize = PAGE_SIZE;
-
/*
* We don't try to attempt partial visible support for stolen vram,
* since stolen is always at the end of vram, and the BAR size is pretty
@@ -235,7 +236,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
io_size = stolen_size;
err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
- io_size, pgsize);
+ io_size, PAGE_SIZE);
if (err) {
drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
return;
@@ -298,7 +299,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
XE_WARN_ON(IS_DGFX(xe));
/* XXX: Require BO to be mapped to GGTT? */
- if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
+ if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT)))
return -EIO;
/* GGTT is always contiguously mapped */
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 3e1fa0c832ca..9844a8edbfe1 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -73,7 +73,10 @@ static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man,
static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
-
+ /*
+ * This function is called by debugfs entry and would require
+ * pm_runtime_{get,put} wrappers around any operation.
+ */
}
static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = {
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 115ec745e502..8a1f460ff20b 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -91,7 +91,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
min_page_size = mgr->default_page_size;
if (tbo->page_alignment)
- min_page_size = tbo->page_alignment << PAGE_SHIFT;
+ min_page_size = (u64)tbo->page_alignment << PAGE_SHIFT;
if (WARN_ON(min_page_size < mm->chunk_size)) {
err = -EINVAL;
@@ -478,3 +478,15 @@ void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
*used_visible = mgr->visible_size - mgr->visible_avail;
mutex_unlock(&mgr->lock);
}
+
+u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man)
+{
+ struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+ u64 avail;
+
+ mutex_lock(&mgr->lock);
+ avail = mgr->mm.avail;
+ mutex_unlock(&mgr->lock);
+
+ return avail;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index d184e19a9230..cc76050e376d 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -25,6 +25,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
struct sg_table *sgt);
+u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man);
u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man);
void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
u64 *used, u64 *used_visible);
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 5c83c75bc497..d4e6fa918942 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -28,7 +28,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
/* Xe2 */
{ XE_RTP_NAME("Tuning: L3 cache"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
@@ -38,11 +38,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
{ XE_RTP_NAME("Tuning: Compression Overfetch"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)),
},
{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
},
{}
@@ -50,7 +50,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
static const struct xe_rtp_entry_sr engine_tunings[] = {
{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
},
@@ -88,7 +88,7 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
/* Xe_LPG */
{ XE_RTP_NAME("Tuning: L3 cache"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 7033f8c1b431..4feb35c95a1c 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -32,11 +32,8 @@ uc_to_xe(struct xe_uc *uc)
/* Should be called once at driver load only */
int xe_uc_init(struct xe_uc *uc)
{
- struct xe_device *xe = uc_to_xe(uc);
int ret;
- xe_device_mem_access_get(xe);
-
/*
* We call the GuC/HuC/GSC init functions even if GuC submission is off
* to correctly move our tracking of the FW state to "disabled".
@@ -65,16 +62,8 @@ int xe_uc_init(struct xe_uc *uc)
goto err;
ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0);
- if (ret)
- goto err;
-
- xe_device_mem_access_put(xe);
-
- return 0;
err:
- xe_device_mem_access_put(xe);
-
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index a9d25b3fa67c..186f81640cef 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -17,6 +17,7 @@
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_module.h"
+#include "xe_sriov.h"
#include "xe_uc_fw.h"
/*
@@ -296,36 +297,28 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
}
-static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
{
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC);
- xe_gt_assert(gt, release->major >= 70);
-
- if (release->major > 70 || release->minor >= 6) {
- /* v70.6.0 adds CSS header support */
- compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
- css->submission_version);
- compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
- css->submission_version);
- compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
- css->submission_version);
- } else if (release->minor >= 3) {
- /* v70.3.0 introduced v1.1.0 */
- compatibility->major = 1;
- compatibility->minor = 1;
- compatibility->patch = 0;
- } else {
- /* v70.0.0 introduced v1.0.0 */
- compatibility->major = 1;
- compatibility->minor = 0;
- compatibility->patch = 0;
+
+ /* We don't support GuC releases older than 70.19 */
+ if (release->major < 70 || (release->major == 70 && release->minor < 19)) {
+ xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n",
+ release->major, release->minor);
+ return -EINVAL;
}
+ compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version);
+ compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version);
+ compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version);
+
uc_fw->private_data_size = css->private_data_size;
+
+ return 0;
}
int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
@@ -424,7 +417,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
if (uc_fw->type == XE_UC_FW_TYPE_GUC)
- guc_read_css_info(uc_fw, css);
+ return guc_read_css_info(uc_fw, css);
return 0;
}
@@ -658,7 +651,17 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar
xe_assert(xe, !uc_fw->path);
uc_fw_auto_select(xe, uc_fw);
+
+ if (IS_SRIOV_VF(xe)) {
+ /* VF will support only firmwares that driver can autoselect */
+ xe_uc_fw_change_status(uc_fw, uc_fw->path ?
+ XE_UC_FIRMWARE_PRELOADED :
+ XE_UC_FIRMWARE_NOT_SUPPORTED);
+ return 0;
+ }
+
uc_fw_override(uc_fw);
+
xe_uc_fw_change_status(uc_fw, uc_fw->path ?
XE_UC_FIRMWARE_SELECTED :
XE_UC_FIRMWARE_NOT_SUPPORTED);
@@ -771,7 +774,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
return 0;
err = uc_fw_copy(uc_fw, fw->data, fw->size,
- XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT);
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
uc_fw_release(fw);
@@ -787,7 +791,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
{
struct xe_device *xe = uc_fw_to_xe(uc_fw);
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
- u32 src_offset, dma_ctrl;
+ u64 src_offset;
+ u32 dma_ctrl;
int ret;
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index 85c20795d1f8..35078038797e 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -59,6 +59,8 @@ const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status)
return "TRANSFERRED";
case XE_UC_FIRMWARE_RUNNING:
return "RUNNING";
+ case XE_UC_FIRMWARE_PRELOADED:
+ return "PRELOADED";
}
return "<invalid>";
}
@@ -85,6 +87,7 @@ static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
case XE_UC_FIRMWARE_LOADABLE:
case XE_UC_FIRMWARE_TRANSFERRED:
case XE_UC_FIRMWARE_RUNNING:
+ case XE_UC_FIRMWARE_PRELOADED:
return 0;
}
return -EINVAL;
@@ -134,7 +137,8 @@ static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw)
static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw)
{
- return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE;
+ return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE &&
+ __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_PRELOADED;
}
static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
@@ -144,7 +148,7 @@ static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw)
{
- return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING;
+ return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_RUNNING;
}
static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw)
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index bc800b696866..0d8caa0e7354 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -50,7 +50,8 @@ enum xe_uc_fw_status {
XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
- XE_UC_FIRMWARE_RUNNING /* init/auth done */
+ XE_UC_FIRMWARE_RUNNING, /* init/auth done */
+ XE_UC_FIRMWARE_PRELOADED, /* preloaded by the PF driver */
};
enum xe_uc_fw_type {
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 3d4c8f342e21..85d6f359142d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -21,12 +21,12 @@
#include <generated/xe_wa_oob.h>
+#include "regs/xe_gtt_defs.h"
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
-#include "xe_gt.h"
#include "xe_gt_pagefault.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
@@ -38,6 +38,7 @@
#include "xe_sync.h"
#include "xe_trace.h"
#include "xe_wa.h"
+#include "xe_hmm.h"
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
@@ -65,113 +66,14 @@ int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
{
- struct xe_userptr *userptr = &uvma->userptr;
struct xe_vma *vma = &uvma->vma;
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_device *xe = vm->xe;
- const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
- struct page **pages;
- bool in_kthread = !current->mm;
- unsigned long notifier_seq;
- int pinned, ret, i;
- bool read_only = xe_vma_read_only(vma);
lockdep_assert_held(&vm->lock);
xe_assert(xe, xe_vma_is_userptr(vma));
-retry:
- if (vma->gpuva.flags & XE_VMA_DESTROYED)
- return 0;
-
- notifier_seq = mmu_interval_read_begin(&userptr->notifier);
- if (notifier_seq == userptr->notifier_seq)
- return 0;
-
- pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- if (userptr->sg) {
- dma_unmap_sgtable(xe->drm.dev,
- userptr->sg,
- read_only ? DMA_TO_DEVICE :
- DMA_BIDIRECTIONAL, 0);
- sg_free_table(userptr->sg);
- userptr->sg = NULL;
- }
-
- pinned = ret = 0;
- if (in_kthread) {
- if (!mmget_not_zero(userptr->notifier.mm)) {
- ret = -EFAULT;
- goto mm_closed;
- }
- kthread_use_mm(userptr->notifier.mm);
- }
-
- while (pinned < num_pages) {
- ret = get_user_pages_fast(xe_vma_userptr(vma) +
- pinned * PAGE_SIZE,
- num_pages - pinned,
- read_only ? 0 : FOLL_WRITE,
- &pages[pinned]);
- if (ret < 0)
- break;
-
- pinned += ret;
- ret = 0;
- }
-
- if (in_kthread) {
- kthread_unuse_mm(userptr->notifier.mm);
- mmput(userptr->notifier.mm);
- }
-mm_closed:
- if (ret)
- goto out;
-
- ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
- pinned, 0,
- (u64)pinned << PAGE_SHIFT,
- xe_sg_segment_size(xe->drm.dev),
- GFP_KERNEL);
- if (ret) {
- userptr->sg = NULL;
- goto out;
- }
- userptr->sg = &userptr->sgt;
-
- ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
- read_only ? DMA_TO_DEVICE :
- DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC |
- DMA_ATTR_NO_KERNEL_MAPPING);
- if (ret) {
- sg_free_table(userptr->sg);
- userptr->sg = NULL;
- goto out;
- }
-
- for (i = 0; i < pinned; ++i) {
- if (!read_only) {
- lock_page(pages[i]);
- set_page_dirty(pages[i]);
- unlock_page(pages[i]);
- }
- mark_page_accessed(pages[i]);
- }
-
-out:
- release_pages(pages, pinned);
- kvfree(pages);
-
- if (!(ret < 0)) {
- userptr->notifier_seq = notifier_seq;
- if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
- goto retry;
- }
-
- return ret < 0 ? ret : 0;
+ return xe_hmm_userptr_populate_range(uvma, false);
}
static bool preempt_fences_waiting(struct xe_vm *vm)
@@ -682,6 +584,10 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
if (!mmu_notifier_range_blockable(range))
return false;
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+ xe_vma_start(vma), xe_vma_size(vma));
+
down_write(&vm->userptr.notifier_lock);
mmu_interval_set_seq(mni, cur_seq);
@@ -951,8 +857,11 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
static void xe_vma_destroy_late(struct xe_vma *vma)
{
struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_device *xe = vm->xe;
- bool read_only = xe_vma_read_only(vma);
+
+ if (vma->ufence) {
+ xe_sync_ufence_put(vma->ufence);
+ vma->ufence = NULL;
+ }
if (vma->ufence) {
xe_sync_ufence_put(vma->ufence);
@@ -960,16 +869,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
}
if (xe_vma_is_userptr(vma)) {
- struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+ struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+ struct xe_userptr *userptr = &uvma->userptr;
- if (userptr->sg) {
- dma_unmap_sgtable(xe->drm.dev,
- userptr->sg,
- read_only ? DMA_TO_DEVICE :
- DMA_BIDIRECTIONAL, 0);
- sg_free_table(userptr->sg);
- userptr->sg = NULL;
- }
+ if (userptr->sg)
+ xe_hmm_userptr_free_sg(uvma);
/*
* Since userptr pages are not pinned, we can't remove
@@ -1274,8 +1178,6 @@ static const struct xe_pt_ops xelp_pt_ops = {
.pde_encode_bo = xelp_pde_encode_bo,
};
-static void vm_destroy_work_func(struct work_struct *w);
-
/**
* xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
* given tile and vm.
@@ -1355,8 +1257,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
init_rwsem(&vm->userptr.notifier_lock);
spin_lock_init(&vm->userptr.invalidated_lock);
- INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
-
INIT_LIST_HEAD(&vm->preempt.exec_queues);
vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
@@ -1366,7 +1266,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
vm->pt_ops = &xelp_pt_ops;
if (!(flags & XE_VM_FLAG_MIGRATION))
- xe_device_mem_access_get(xe);
+ xe_pm_runtime_get_noresume(xe);
vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
if (!vm_resv_obj) {
@@ -1411,9 +1311,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
vm->batch_invalidate_tlb = true;
}
- if (flags & XE_VM_FLAG_LR_MODE) {
+ if (vm->flags & XE_VM_FLAG_LR_MODE) {
INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
- vm->flags |= XE_VM_FLAG_LR_MODE;
vm->batch_invalidate_tlb = false;
}
@@ -1477,7 +1376,7 @@ err_no_resv:
xe_range_fence_tree_fini(&vm->rftree[id]);
kfree(vm);
if (!(flags & XE_VM_FLAG_MIGRATION))
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
return ERR_PTR(err);
}
@@ -1595,10 +1494,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_vm_put(vm);
}
-static void vm_destroy_work_func(struct work_struct *w)
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
{
- struct xe_vm *vm =
- container_of(w, struct xe_vm, destroy_work);
+ struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
u8 id;
@@ -1609,7 +1507,7 @@ static void vm_destroy_work_func(struct work_struct *w)
mutex_destroy(&vm->snap_mutex);
if (!(vm->flags & XE_VM_FLAG_MIGRATION))
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
for_each_tile(tile, xe, id)
XE_WARN_ON(vm->pt_root[id]);
@@ -1618,14 +1516,6 @@ static void vm_destroy_work_func(struct work_struct *w)
kfree(vm);
}
-static void xe_vm_free(struct drm_gpuvm *gpuvm)
-{
- struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
-
- /* To destroy the VM we need to be able to sleep */
- queue_work(system_unbound_wq, &vm->destroy_work);
-}
-
struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
{
struct xe_vm *vm;
@@ -1722,7 +1612,7 @@ next:
xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
if (last_op) {
for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], NULL, fence);
+ xe_sync_entry_signal(&syncs[i], fence);
}
return fence;
@@ -1796,7 +1686,7 @@ next:
if (last_op) {
for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], NULL,
+ xe_sync_entry_signal(&syncs[i],
cf ? &cf->base : fence);
}
@@ -1857,7 +1747,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
if (last_op) {
for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], NULL, fence);
+ xe_sync_entry_signal(&syncs[i], fence);
}
}
@@ -2058,7 +1948,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
int err;
- xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+ xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
if (!xe_vma_has_no_bo(vma)) {
err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
@@ -2078,7 +1968,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
struct dma_fence *fence =
xe_exec_queue_last_fence_get(wait_exec_queue, vm);
- xe_sync_entry_signal(&syncs[i], NULL, fence);
+ xe_sync_entry_signal(&syncs[i], fence);
dma_fence_put(fence);
}
}
@@ -2823,7 +2713,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
return 0;
}
-#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \
+#define SUPPORTED_FLAGS \
+ (DRM_XE_VM_BIND_FLAG_READONLY | \
+ DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
+ DRM_XE_VM_BIND_FLAG_NULL | \
DRM_XE_VM_BIND_FLAG_DUMPABLE)
#define XE_64K_PAGE_MASK 0xffffull
#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
@@ -2956,7 +2849,7 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
return PTR_ERR(fence);
for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], NULL, fence);
+ xe_sync_entry_signal(&syncs[i], fence);
xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
fence);
@@ -3067,7 +2960,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto put_obj;
}
- if (bos[i]->flags & XE_BO_INTERNAL_64K) {
+ if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) {
if (XE_IOCTL_DBG(xe, obj_offset &
XE_64K_PAGE_MASK) ||
XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
@@ -3259,6 +3152,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
xe_assert(xe, !xe_vma_is_null(vma));
trace_xe_vma_invalidate(vma);
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "INVALIDATE: addr=0x%016llx, range=0x%016llx",
+ xe_vma_start(vma), xe_vma_size(vma));
+
/* Check that we don't race with page-table updates */
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
if (xe_vma_is_userptr(vma)) {
@@ -3377,8 +3274,10 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
if (num_snaps)
snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
- if (!snap)
+ if (!snap) {
+ snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
goto out_unlock;
+ }
snap->num_snaps = num_snaps;
i = 0;
@@ -3418,6 +3317,9 @@ out_unlock:
void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
{
+ if (IS_ERR_OR_NULL(snap))
+ return;
+
for (int i = 0; i < snap->num_snaps; i++) {
struct xe_bo *bo = snap->snap[i].bo;
struct iosys_map src;
@@ -3472,13 +3374,21 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
{
unsigned long i, j;
- for (i = 0; i < snap->num_snaps; i++) {
- if (IS_ERR(snap->snap[i].data))
- goto uncaptured;
+ if (IS_ERR_OR_NULL(snap)) {
+ drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
+ return;
+ }
+ for (i = 0; i < snap->num_snaps; i++) {
drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
- drm_printf(p, "[%llx].data: ",
- snap->snap[i].ofs);
+
+ if (IS_ERR(snap->snap[i].data)) {
+ drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
+ PTR_ERR(snap->snap[i].data));
+ continue;
+ }
+
+ drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
u32 *val = snap->snap[i].data + j;
@@ -3488,12 +3398,6 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
}
drm_puts(p, "\n");
- continue;
-
-uncaptured:
- drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n",
- snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1,
- PTR_ERR(snap->snap[i].data));
}
}
@@ -3501,7 +3405,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
{
unsigned long i;
- if (!snap)
+ if (IS_ERR_OR_NULL(snap))
return;
for (i = 0; i < snap->num_snaps; i++) {
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index badf3945083d..7570c2c6c463 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -178,13 +178,6 @@ struct xe_vm {
struct list_head rebind_list;
/**
- * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
- * from an irq context can be last put and the destroy needs to be able
- * to sleep.
- */
- struct work_struct destroy_work;
-
- /**
* @rftree: range fence tree to track updates to page table structure.
* Used to implement conflict tracking between independent bind engines.
*/
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c
index c5f6b5a5d117..3e21ddc6e60c 100644
--- a/drivers/gpu/drm/xe/xe_vram_freq.c
+++ b/drivers/gpu/drm/xe/xe_vram_freq.c
@@ -100,31 +100,27 @@ static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg)
* @tile: Xe Tile object
*
* It needs to be initialized after the main tile component is ready
+ *
+ * Returns: 0 on success, negative error code on error.
*/
-void xe_vram_freq_sysfs_init(struct xe_tile *tile)
+int xe_vram_freq_sysfs_init(struct xe_tile *tile)
{
struct xe_device *xe = tile_to_xe(tile);
struct kobject *kobj;
int err;
if (xe->info.platform != XE_PVC)
- return;
+ return 0;
kobj = kobject_create_and_add("memory", tile->sysfs);
- if (!kobj) {
- drm_warn(&xe->drm, "failed to add memory directory, err: %d\n", -ENOMEM);
- return;
- }
+ if (!kobj)
+ return -ENOMEM;
err = sysfs_create_group(kobj, &freq_group_attrs);
if (err) {
kobject_put(kobj);
- drm_warn(&xe->drm, "failed to register vram freq sysfs, err: %d\n", err);
- return;
+ return err;
}
- err = drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj);
- if (err)
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
+ return drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj);
}
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.h b/drivers/gpu/drm/xe/xe_vram_freq.h
index cbe8c12fbd64..bf726bc5881f 100644
--- a/drivers/gpu/drm/xe/xe_vram_freq.h
+++ b/drivers/gpu/drm/xe/xe_vram_freq.h
@@ -8,6 +8,6 @@
struct xe_tile;
-void xe_vram_freq_sysfs_init(struct xe_tile *tile);
+int xe_vram_freq_sysfs_init(struct xe_tile *tile);
#endif /* _XE_VRAM_FREQ_H_ */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index a0264eedd443..dcf7ed51757c 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -173,11 +173,11 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
},
{ XE_RTP_NAME("14018575942"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)),
XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB))
},
{ XE_RTP_NAME("22016670082"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)),
XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
},
@@ -228,6 +228,28 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
+ /* Xe2_HPM */
+
+ { XE_RTP_NAME("16021867713"),
+ XE_RTP_RULES(MEDIA_VERSION(1301),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+ { XE_RTP_NAME("14020316580"),
+ XE_RTP_RULES(MEDIA_VERSION(1301)),
+ XE_RTP_ACTIONS(CLR(PG_ENABLE,
+ VD0_HCP_POWERGATE_ENABLE |
+ VD0_MFXVDENC_POWERGATE_ENABLE |
+ VD2_HCP_POWERGATE_ENABLE |
+ VD2_MFXVDENC_POWERGATE_ENABLE)),
+ },
+ { XE_RTP_NAME("14019449301"),
+ XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+
{}
};
@@ -328,12 +350,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
},
- { XE_RTP_NAME("16015675438"),
- XE_RTP_RULES(PLATFORM(DG2),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
- PERF_FIX_BALANCING_CFE_DISABLE))
- },
{ XE_RTP_NAME("18028616096"),
XE_RTP_RULES(PLATFORM(DG2),
FUNC(xe_rtp_match_first_render_or_compute)),
@@ -383,10 +399,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
},
- { XE_RTP_NAME("16015675438"),
- XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
- PERF_FIX_BALANCING_CFE_DISABLE))
+ { XE_RTP_NAME("18020744125"),
+ XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute),
+ ENGINE_CLASS(COMPUTE)),
+ XE_RTP_ACTIONS(SET(RING_HWSTAM(RENDER_RING_BASE), ~0))
},
{ XE_RTP_NAME("14014999345"),
XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE),
@@ -397,7 +413,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
/* Xe_LPG */
{ XE_RTP_NAME("14017856879"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH))
},
@@ -407,6 +423,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
XE_RTP_NOCHECK))
},
+ { XE_RTP_NAME("14020495402"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_TDL_SVHS_GATING))
+ },
/* Xe2_LPG */
@@ -424,11 +445,20 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
},
+ { XE_RTP_NAME("14020338487"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS))
+ },
{ XE_RTP_NAME("16021540221"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
},
+ { XE_RTP_NAME("18034896535"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+ },
{ XE_RTP_NAME("14019322943"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
FUNC(xe_rtp_match_first_render_or_compute)),
@@ -460,6 +490,65 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
},
+
+ /* Xe2_HPG */
+
+ { XE_RTP_NAME("16018712365"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
+ },
+ { XE_RTP_NAME("16018737384"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
+ },
+ { XE_RTP_NAME("14019988906"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
+ },
+ { XE_RTP_NAME("14019877138"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+ },
+ { XE_RTP_NAME("14020338487"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS))
+ },
+ { XE_RTP_NAME("18032247524"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
+ },
+ { XE_RTP_NAME("14018471104"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
+ },
+ /*
+ * Although this workaround isn't required for the RCS, disabling these
+ * reports has no impact for our driver or the GuC, so we go ahead and
+ * apply this to all engines for simplicity.
+ */
+ { XE_RTP_NAME("16021639441"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001)),
+ XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+ GHWSP_CSB_REPORT_DIS |
+ PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+ XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+ },
+ { XE_RTP_NAME("14019811474"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS))
+ },
+
+ /* Xe2_HPM */
+
+ { XE_RTP_NAME("16021639441"),
+ XE_RTP_RULES(MEDIA_VERSION(1301)),
+ XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+ GHWSP_CSB_REPORT_DIS |
+ PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+ XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+ },
+
{}
};
@@ -537,7 +626,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
/* Xe_LPG */
{ XE_RTP_NAME("18019271663"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)),
XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
},
{ XE_RTP_NAME("14019877138"),
@@ -580,6 +669,24 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT))
},
+ { XE_RTP_NAME("18033852989"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
+ },
+
+ /* Xe2_HPG */
+ { XE_RTP_NAME("15010599737"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN))
+ },
+ { XE_RTP_NAME("14019386621"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
+ },
+ { XE_RTP_NAME("14020756599"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
+ },
{}
};
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index b138cbd51bdb..12fe88796a49 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -4,9 +4,6 @@
22011391025 PLATFORM(DG2)
22012727170 SUBPLATFORM(DG2, G11)
22012727685 SUBPLATFORM(DG2, G11)
-16015675438 PLATFORM(PVC)
- SUBPLATFORM(DG2, G10)
- SUBPLATFORM(DG2, G12)
18020744125 PLATFORM(PVC)
1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
1409600907 GRAPHICS_VERSION_RANGE(1200, 1250)
@@ -22,3 +19,11 @@
GRAPHICS_VERSION_RANGE(1270, 1274)
MEDIA_VERSION(1300)
PLATFORM(DG2)
+14018094691 GRAPHICS_VERSION(2004)
+14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+18024947630 GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2004)
+ MEDIA_VERSION(2000)
+16022287689 GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2004)
+13011645652 GRAPHICS_VERSION(2004)
diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index de1a344737bc..adb37bc541e4 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -134,7 +134,9 @@
MACRO__(0x5692, ## __VA_ARGS__), \
MACRO__(0x56A0, ## __VA_ARGS__), \
MACRO__(0x56A1, ## __VA_ARGS__), \
- MACRO__(0x56A2, ## __VA_ARGS__)
+ MACRO__(0x56A2, ## __VA_ARGS__), \
+ MACRO__(0x56BE, ## __VA_ARGS__), \
+ MACRO__(0x56BF, ## __VA_ARGS__)
#define XE_DG2_G11_IDS(MACRO__, ...) \
MACRO__(0x5693, ## __VA_ARGS__), \
@@ -176,10 +178,13 @@
/* MTL / ARL */
#define XE_MTL_IDS(MACRO__, ...) \
MACRO__(0x7D40, ## __VA_ARGS__), \
+ MACRO__(0x7D41, ## __VA_ARGS__), \
MACRO__(0x7D45, ## __VA_ARGS__), \
+ MACRO__(0x7D51, ## __VA_ARGS__), \
MACRO__(0x7D55, ## __VA_ARGS__), \
MACRO__(0x7D60, ## __VA_ARGS__), \
MACRO__(0x7D67, ## __VA_ARGS__), \
+ MACRO__(0x7DD1, ## __VA_ARGS__), \
MACRO__(0x7DD5, ## __VA_ARGS__)
#define XE_LNL_IDS(MACRO__, ...) \
diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h
index c008169ed2c6..c8f7eb6cc191 100644
--- a/include/linux/devcoredump.h
+++ b/include/linux/devcoredump.h
@@ -63,6 +63,8 @@ void dev_coredumpm(struct device *dev, struct module *owner,
void dev_coredumpsg(struct device *dev, struct scatterlist *table,
size_t datalen, gfp_t gfp);
+
+void dev_coredump_put(struct device *dev);
#else
static inline void dev_coredumpv(struct device *dev, void *data,
size_t datalen, gfp_t gfp)
@@ -85,6 +87,9 @@ static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table,
{
_devcd_free_sgtable(table);
}
+static inline void dev_coredump_put(struct device *dev)
+{
+}
#endif /* CONFIG_DEV_COREDUMP */
#endif /* __DEVCOREDUMP_H */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 538a3ac95c54..1446c3bae515 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -459,8 +459,16 @@ struct drm_xe_gt {
* by struct drm_xe_query_mem_regions' mem_class.
*/
__u64 far_mem_regions;
+ /** @ip_ver_major: Graphics/media IP major version on GMD_ID platforms */
+ __u16 ip_ver_major;
+ /** @ip_ver_minor: Graphics/media IP minor version on GMD_ID platforms */
+ __u16 ip_ver_minor;
+ /** @ip_ver_rev: Graphics/media IP revision version on GMD_ID platforms */
+ __u16 ip_ver_rev;
+ /** @pad2: MBZ */
+ __u16 pad2;
/** @reserved: Reserved */
- __u64 reserved[8];
+ __u64 reserved[7];
};
/**
@@ -510,9 +518,9 @@ struct drm_xe_query_topology_mask {
/** @gt_id: GT ID the mask is associated with */
__u16 gt_id;
-#define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0)
-#define DRM_XE_TOPO_DSS_COMPUTE (1 << 1)
-#define DRM_XE_TOPO_EU_PER_DSS (1 << 2)
+#define DRM_XE_TOPO_DSS_GEOMETRY 1
+#define DRM_XE_TOPO_DSS_COMPUTE 2
+#define DRM_XE_TOPO_EU_PER_DSS 4
/** @type: type of mask */
__u16 type;
@@ -583,6 +591,7 @@ struct drm_xe_query_engine_cycles {
struct drm_xe_query_uc_fw_version {
/** @uc_type: The micro-controller type to query firmware version */
#define XE_QUERY_UC_TYPE_GUC_SUBMISSION 0
+#define XE_QUERY_UC_TYPE_HUC 1
__u16 uc_type;
/** @pad: MBZ */
@@ -862,6 +871,12 @@ struct drm_xe_vm_destroy {
* - %DRM_XE_VM_BIND_OP_PREFETCH
*
* and the @flags can be:
+ * - %DRM_XE_VM_BIND_FLAG_READONLY - Setup the page tables as read-only
+ * to ensure write protection
+ * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - On a faulting VM, do the
+ * MAP operation immediately rather than deferring the MAP to the page
+ * fault handler. This is implied on a non-faulting VM as there is no
+ * fault handler to defer to.
* - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page
* tables are setup with a special bit which indicates writes are
* dropped and all reads return zero. In the future, the NULL flags
@@ -954,6 +969,8 @@ struct drm_xe_vm_bind_op {
/** @op: Bind operation to perform */
__u32 op;
+#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
+#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
/** @flags: Bind flags */