summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLucas De Marchi <lucas.demarchi@intel.com>2024-10-24 16:15:44 -0500
committerLucas De Marchi <lucas.demarchi@intel.com>2024-10-24 16:15:44 -0500
commit8ccf06f174811c962af31157c247cffbd9317f0a (patch)
treeccc3e0444827e756b889b9d5105f5ef1370d0661
parent5db8be8fbfef01a71cf25202d940bab22f97b0b9 (diff)
2024y-10m-24d-21h-15m-10s UTC: drm-tip rerere cache update
git version 2.47.0
-rw-r--r--rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/postimage.21853
-rw-r--r--rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage1866
-rw-r--r--rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.11866
-rw-r--r--rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.21866
-rw-r--r--rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/postimage.21037
-rw-r--r--rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage1041
-rw-r--r--rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.11041
-rw-r--r--rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.21041
-rw-r--r--rr-cache/5fdf7e6a138a878e8f03a52286f9d1bbf3929b0e/preimage529
-rw-r--r--rr-cache/64a8b1e687217096af72085e0d85f39e6d425bd5/preimage86
-rw-r--r--rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/postimage1001
-rw-r--r--rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/preimage1005
-rw-r--r--rr-cache/dc755e41b3920c4fd040291cf34f6f63263f016b/preimage531
13 files changed, 14763 insertions, 0 deletions
diff --git a/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/postimage.2 b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/postimage.2
new file mode 100644
index 000000000000..c260d8840990
--- /dev/null
+++ b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/postimage.2
@@ -0,0 +1,1853 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ct.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+
+#include <kunit/static_stub.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "xe_bo.h"
+#include "xe_devcoredump.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_monitor.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_log.h"
+#include "xe_guc_relay.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_trace_guc.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+enum {
+ /* Internal states, not error conditions */
+ CT_DEAD_STATE_REARM, /* 0x0001 */
+ CT_DEAD_STATE_CAPTURE, /* 0x0002 */
+
+ /* Error conditions */
+ CT_DEAD_SETUP, /* 0x0004 */
+ CT_DEAD_H2G_WRITE, /* 0x0008 */
+ CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */
+ CT_DEAD_G2H_READ, /* 0x0020 */
+ CT_DEAD_G2H_RECV, /* 0x0040 */
+ CT_DEAD_G2H_RELEASE, /* 0x0080 */
+ CT_DEAD_DEADLOCK, /* 0x0100 */
+ CT_DEAD_PROCESS_FAILED, /* 0x0200 */
+ CT_DEAD_FAST_G2H, /* 0x0400 */
+ CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */
+ CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
+ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
+ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+};
+
+static void ct_dead_worker_func(struct work_struct *w);
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+#else
+#define CT_DEAD(ct, ctb, reason) \
+ do { \
+ struct guc_ctb *_ctb = (ctb); \
+ if (_ctb) \
+ _ctb->info.broken = true; \
+ } while (0)
+#endif
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+ u32 *response_buffer;
+ u32 seqno;
+ u32 response_data;
+ u16 response_len;
+ u16 error;
+ u16 hint;
+ u16 reason;
+ bool retry;
+ bool fail;
+ bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+ g2h_fence->response_buffer = response_buffer;
+ g2h_fence->response_data = 0;
+ g2h_fence->response_len = 0;
+ g2h_fence->fail = false;
+ g2h_fence->retry = false;
+ g2h_fence->done = false;
+ g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+ return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+ return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ * +--------+-----------------------------------------------+------+
+ * | offset | contents | size |
+ * +========+===============================================+======+
+ * | 0x0000 | H2G CTB Descriptor (send) | |
+ * +--------+-----------------------------------------------+ 4K |
+ * | 0x0800 | G2H CTB Descriptor (g2h) | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | H2G CT Buffer (send) | n*4K |
+ * | | | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | G2H CT Buffer (g2h) | m*4K |
+ * | + n*4K | | |
+ * +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ *
+ * In addition to submissions, the G2H buffer needs to be able to hold
+ * enough space for recoverable page fault notifications. The number of
+ * page faults is interrupt driven and can be as much as the number of
+ * compute resources available. However, most of the actual work for these
+ * is in a separate page fault worker thread. Therefore we only need to
+ * make sure the queue has enough space to handle all of the submissions
+ * and responses and an extra buffer for incoming page faults.
+ */
+
+#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE (SZ_4K)
+#define CTB_G2H_BUFFER_SIZE (SZ_128K)
+#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2)
+
+/**
+ * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
+ * CT command queue
+ * @ct: the &xe_guc_ct. Unused at this moment but will be used in the future.
+ *
+ * Observation is that a 4KiB buffer full of commands takes a little over a
+ * second to process. Use that to calculate maximum time to process a full CT
+ * command queue.
+ *
+ * Return: Maximum time to process a full CT queue in jiffies.
+ */
+long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
+{
+ BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4));
+ return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ;
+}
+
+static size_t guc_ct_size(void)
+{
+ return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+ CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ destroy_workqueue(ct->g2h_wq);
+ xa_destroy(&ct->fence_lookup);
+}
+
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+ if (!IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&ct->lock);
+ fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ int err;
+
+ xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
+
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ if (!ct->g2h_wq)
+ return -ENOMEM;
+
+ spin_lock_init(&ct->fast_lock);
+ xa_init(&ct->fence_lookup);
+ INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#endif
+ init_waitqueue_head(&ct->wq);
+ init_waitqueue_head(&ct->g2h_fence_wq);
+
+ err = drmm_mutex_init(&xe->drm, &ct->lock);
+ if (err)
+ return err;
+
+ primelockdep(ct);
+
+ bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ct->bo = bo;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+ if (err)
+ return err;
+
+ xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+ ct->state = XE_GUC_CT_STATE_DISABLED;
+ return 0;
+}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
+
+#define desc_read(xe_, guc_ctb__, field_) \
+ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_) \
+ xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+ struct iosys_map *map)
+{
+ h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+ h2g->info.resv_space = 0;
+ h2g->info.tail = 0;
+ h2g->info.head = 0;
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ h2g->info.broken = false;
+
+ h2g->desc = *map;
+ xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+ struct iosys_map *map)
+{
+ g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+ g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+ g2h->info.head = 0;
+ g2h->info.tail = 0;
+ g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
+ g2h->info.size) -
+ g2h->info.resv_space;
+ g2h->info.broken = false;
+
+ g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+ xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo);
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+ size = ct->ctbs.h2g.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE;
+ size = ct->ctbs.g2h.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+ u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+ GUC_ACTION_HOST2GUC_CONTROL_CTB),
+ FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+ enable ? GUC_CTB_CONTROL_ENABLE :
+ GUC_CTB_CONTROL_DISABLE),
+ };
+ int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state)
+{
+ mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+
+ xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
+ state == XE_GUC_CT_STATE_STOPPED);
+
+ if (ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+ ct->g2h_outstanding = 0;
+ ct->state = state;
+
+ spin_unlock_irq(&ct->fast_lock);
+
+ /*
+ * Lockdep doesn't like this under the fast lock and he destroy only
+ * needs to be serialized with the send path which ct lock provides.
+ */
+ xa_destroy(&ct->fence_lookup);
+
+ mutex_unlock(&ct->lock);
+}
+
+static bool ct_needs_safe_mode(struct xe_guc_ct *ct)
+{
+ return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev));
+}
+
+static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct)
+{
+ if (!ct_needs_safe_mode(ct))
+ return false;
+
+ queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10);
+ return true;
+}
+
+static void safe_mode_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work);
+
+ receive_g2h(ct);
+
+ if (!ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n");
+}
+
+static void ct_enter_safe_mode(struct xe_guc_ct *ct)
+{
+ if (ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n");
+}
+
+static void ct_exit_safe_mode(struct xe_guc_ct *ct)
+{
+ if (cancel_delayed_work_sync(&ct->safe_mode_worker))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n");
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ int err;
+
+ xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
+
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
+ guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+ guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+ err = guc_ct_ctb_h2g_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_ctb_g2h_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_control_toggle(ct, true);
+ if (err)
+ goto err_out;
+
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
+
+ smp_mb();
+ wake_up_all(&ct->wq);
+ xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
+
+ if (ct_needs_safe_mode(ct))
+ ct_enter_safe_mode(ct);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /*
+ * The CT has now been reset so the dumper can be re-armed
+ * after any existing dead state has been dumped.
+ */
+ spin_lock_irq(&ct->dead.lock);
+ if (ct->dead.reason)
+ ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ spin_unlock_irq(&ct->dead.lock);
+#endif
+
+ return 0;
+
+err_out:
+ xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, NULL, SETUP);
+
+ return err;
+}
+
+static void stop_g2h_handler(struct xe_guc_ct *ct)
+{
+ cancel_work_sync(&ct->g2h_worker);
+}
+
+/**
+ * xe_guc_ct_disable - Set GuC to disabled state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected
+ * in this transition.
+ */
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
+ ct_exit_safe_mode(ct);
+ stop_g2h_handler(ct);
+}
+
+/**
+ * xe_guc_ct_stop - Set GuC to stopped state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
+ */
+void xe_guc_ct_stop(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
+ stop_g2h_handler(ct);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ lockdep_assert_held(&ct->lock);
+
+ if (cmd_len > h2g->info.space) {
+ h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+
+ if (h2g->info.head > h2g->info.size) {
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 desc_status = desc_read(xe, h2g, status);
+
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+
+ xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n",
+ h2g->info.head, h2g->info.size);
+ CT_DEAD(ct, h2g, H2G_HAS_ROOM);
+ return false;
+ }
+
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ if (cmd_len > h2g->info.space)
+ return false;
+ }
+
+ return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ if (!g2h_len)
+ return true;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ return ct->ctbs.g2h.info.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+ lockdep_assert_held(&ct->lock);
+
+ if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+ return -EBUSY;
+
+ return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ lockdep_assert_held(&ct->lock);
+ ct->ctbs.h2g.info.space -= cmd_len;
+}
+
+static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+ xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
+ xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
+ (g2h_len && num_g2h));
+
+ if (g2h_len) {
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (!ct->g2h_outstanding)
+ xe_pm_runtime_get_noresume(ct_to_xe(ct));
+
+ ct->ctbs.g2h.info.space -= g2h_len;
+ ct->g2h_outstanding += num_g2h;
+ }
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ bool bad = false;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ bad = ct->ctbs.g2h.info.space + g2h_len >
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space;
+ bad |= !ct->g2h_outstanding;
+
+ if (bad) {
+ xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n",
+ ct->ctbs.g2h.info.space, g2h_len,
+ ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space,
+ ct->ctbs.g2h.info.space + g2h_len,
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space,
+ ct->g2h_outstanding);
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE);
+ return;
+ }
+
+ ct->ctbs.g2h.info.space += g2h_len;
+ if (!--ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ spin_lock_irq(&ct->fast_lock);
+ __g2h_release_space(ct, g2h_len);
+ spin_unlock_irq(&ct->fast_lock);
+}
+
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 ct_fence_value, bool want_response)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+ u32 cmd[H2G_CT_HEADERS];
+ u32 tail = h2g->info.tail;
+ u32 full_len;
+ struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+ tail * sizeof(u32));
+ u32 desc_status;
+
+ full_len = len + GUC_CTB_HDR_LEN;
+
+ lockdep_assert_held(&ct->lock);
+ xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, h2g, tail);
+ u32 desc_head = desc_read(xe, h2g, head);
+
+ if (tail != desc_tail) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
+ goto corrupted;
+ }
+
+ if (tail > h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
+ tail, h2g->info.size);
+ goto corrupted;
+ }
+
+ if (desc_head >= h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n",
+ desc_head, h2g->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Command will wrap, zero fill (NOPs), return and check credits again */
+ if (tail + full_len > h2g->info.size) {
+ xe_map_memset(xe, &map, 0, 0,
+ (h2g->info.size - tail) * sizeof(u32));
+ h2g_reserve_space(ct, (h2g->info.size - tail));
+ h2g->info.tail = 0;
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ return -EAGAIN;
+ }
+
+ /*
+ * dw0: CT header (including fence)
+ * dw1: HXG header (including action code)
+ * dw2+: action data
+ */
+ cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+ FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+ FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+ if (want_response) {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ } else {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ }
+
+ /* H2G header in cmd[1] replaces action[0] so: */
+ --len;
+ ++action;
+
+ /* Write H2G ensuring visable before descriptor update */
+ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+ xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
+ xe_device_wmb(xe);
+
+ /* Update local copies */
+ h2g->info.tail = (tail + full_len) % h2g->info.size;
+ h2g_reserve_space(ct, full_len);
+
+ /* Update descriptor */
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len,
+ desc_read(xe, h2g, head), h2g->info.tail);
+
+ return 0;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE);
+ return -EPIPE;
+}
+
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+ u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+ if (!is_g2h_fence)
+ seqno |= CT_SEQNO_UNTRACKED;
+
+ return seqno;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
+ u16 seqno;
+ int ret;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ xe_gt_assert(gt, !num_g2h || !g2h_fence);
+ xe_gt_assert(gt, !g2h_len || num_g2h);
+ xe_gt_assert(gt, g2h_len || !num_g2h);
+ lockdep_assert_held(&ct->lock);
+
+ if (unlikely(ct->ctbs.h2g.info.broken)) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED) {
+ ret = -ECANCELED;
+ goto out;
+ }
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ if (g2h_fence) {
+ g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+ num_g2h = 1;
+
+ if (g2h_fence_needs_alloc(g2h_fence)) {
+ g2h_fence->seqno = next_ct_seqno(ct, true);
+ ret = xa_err(xa_store(&ct->fence_lookup,
+ g2h_fence->seqno, g2h_fence,
+ GFP_ATOMIC));
+ if (ret)
+ goto out;
+ }
+
+ seqno = g2h_fence->seqno;
+ } else {
+ seqno = next_ct_seqno(ct, false);
+ }
+
+ if (g2h_len)
+ spin_lock_irq(&ct->fast_lock);
+retry:
+ ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+ if (unlikely(ret))
+ goto out_unlock;
+
+ ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
+ if (unlikely(ret)) {
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out_unlock;
+ }
+
+ __g2h_reserve_space(ct, g2h_len, num_g2h);
+ xe_guc_notify(ct_to_guc(ct));
+out_unlock:
+ if (g2h_len)
+ spin_unlock_irq(&ct->fast_lock);
+out:
+ return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+ xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ unsigned int sleep_period_ms = 1;
+ int ret;
+
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ lockdep_assert_held(&ct->lock);
+ xe_device_assert_mem_access(ct_to_xe(ct));
+
+try_again:
+ ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+ g2h_fence);
+
+ /*
+ * We wait to try to restore credits for about 1 second before bailing.
+ * In the case of H2G credits we have no choice but just to wait for the
+ * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+ * the case of G2H we process any G2H in the channel, hopefully freeing
+ * credits as we consume the G2H messages.
+ */
+ if (unlikely(ret == -EBUSY &&
+ !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ if (sleep_period_ms == 1024)
+ goto broken;
+
+ trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail,
+ h2g->info.size,
+ h2g->info.space,
+ len + GUC_CTB_HDR_LEN);
+ msleep(sleep_period_ms);
+ sleep_period_ms <<= 1;
+
+ goto try_again;
+ } else if (unlikely(ret == -EBUSY)) {
+ struct xe_device *xe = ct_to_xe(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+ trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head,
+ desc_read(xe, g2h, tail),
+ g2h->info.size,
+ g2h->info.space,
+ g2h_fence ?
+ GUC_CTB_HXG_MSG_MAX_LEN :
+ g2h_len);
+
+#define g2h_avail(ct) \
+ (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
+ if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+ g2h_avail(ct), HZ))
+ goto broken;
+#undef g2h_avail
+
+ ret = dequeue_one_g2h(ct);
+ if (ret < 0) {
+ if (ret != -ECANCELED)
+ xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
+ ERR_PTR(ret));
+ goto broken;
+ }
+
+ goto try_again;
+ }
+
+ return ret;
+
+broken:
+ xe_gt_err(gt, "No forward process on H2G, reset required\n");
+ CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK);
+
+ return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+ int ret;
+
+ xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
+
+ mutex_lock(&ct->lock);
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+ if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+ return false;
+
+#define ct_alive(ct) \
+ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
+ !ct->ctbs.g2h.info.broken)
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ return false;
+#undef ct_alive
+
+ return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer, bool no_fail)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct g2h_fence g2h_fence;
+ int ret = 0;
+
+ /*
+ * We use a fence to implement blocking sends / receiving response data.
+ * The seqno of the fence is sent in the H2G, returned in the G2H, and
+ * an xarray is used as storage media with the seqno being to key.
+ * Fields in the fence hold success, failure, retry status and the
+ * response data. Safe to allocate on the stack as the xarray is the
+ * only reference and it cannot be present after this function exits.
+ */
+retry:
+ g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+ ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+ if (unlikely(ret == -ENOMEM)) {
+ /* Retry allocation /w GFP_KERNEL */
+ ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL));
+ if (ret)
+ return ret;
+
+ goto retry_same_fence;
+ } else if (unlikely(ret)) {
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ if (no_fail && retry_failure(ct, ret))
+ goto retry_same_fence;
+
+ if (!g2h_fence_needs_alloc(&g2h_fence))
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+
+ return ret;
+ }
+
+ ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+ * Occasionally it is seen that the G2H worker starts running after a delay of more than
+ * a second even after being queued and activated by the Linux workqueue subsystem. This
+ * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
+ * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS
+ * and this is beyond xe kmd.
+ *
+ * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
+ */
+ if (!ret) {
+ flush_work(&ct->g2h_worker);
+ if (g2h_fence.done) {
+ xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
+ g2h_fence.seqno, action[0]);
+ ret = 1;
+ }
+ }
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+ mutex_unlock(&ct->lock);
+ return -ETIME;
+ }
+
+ if (g2h_fence.retry) {
+ xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
+ action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
+ goto retry;
+ }
+ if (g2h_fence.fail) {
+ xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n",
+ action[0], g2h_fence.error, g2h_fence.hint);
+ ret = -EIO;
+ }
+
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+/**
+ * xe_guc_ct_send_recv - Send and receive HXG to the GuC
+ * @ct: the &xe_guc_ct
+ * @action: the dword array with `HXG Request`_ message (can't be NULL)
+ * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
+ * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
+ *
+ * Send a `HXG Request`_ message to the GuC over CT communication channel and
+ * blocks until GuC replies with a `HXG Response`_ message.
+ *
+ * For non-blocking communication with GuC use xe_guc_ct_send().
+ *
+ * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
+ *
+ * Return: response length (in dwords) if &response_buffer was not NULL, or
+ * DATA0 from `HXG Response`_ if &response_buffer was NULL, or
+ * a negative error code on failure.
+ */
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer)
+{
+ KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
+ return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 *response_buffer)
+{
+ return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static u32 *msg_to_hxg(u32 *msg)
+{
+ return msg + GUC_CTB_MSG_MIN_LEN;
+}
+
+static u32 msg_len_to_hxg_len(u32 len)
+{
+ return len - GUC_CTB_MSG_MIN_LEN;
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ lockdep_assert_held(&ct->lock);
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ g2h_release_space(ct, len);
+ }
+
+ return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+ u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ struct g2h_fence *g2h_fence;
+
+ lockdep_assert_held(&ct->lock);
+
+ /*
+ * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
+ * Those messages should never fail, so if we do get an error back it
+ * means we're likely doing an illegal operation and the GuC is
+ * rejecting it. We have no way to inform the code that submitted the
+ * H2G that the message was rejected, so we need to escalate the
+ * failure to trigger a reset.
+ */
+ if (fence & CT_SEQNO_UNTRACKED) {
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
+ xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
+ fence,
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
+ else
+ xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
+ type, fence);
+ CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
+
+ return -EPROTO;
+ }
+
+ g2h_fence = xa_erase(&ct->fence_lookup, fence);
+ if (unlikely(!g2h_fence)) {
+ /* Don't tear down channel, as send could've timed out */
+ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
+ xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+ return 0;
+ }
+
+ xe_gt_assert(gt, fence == g2h_fence->seqno);
+
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+ g2h_fence->fail = true;
+ g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
+ g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
+ } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+ g2h_fence->retry = true;
+ g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
+ } else if (g2h_fence->response_buffer) {
+ g2h_fence->response_len = hxg_len;
+ memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
+ } else {
+ g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
+ }
+
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+ g2h_fence->done = true;
+ smp_mb();
+
+ wake_up_all(&ct->g2h_fence_wq);
+
+ return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 origin, type;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
+ if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+ xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
+ origin);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN);
+
+ return -EPROTO;
+ }
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ switch (type) {
+ case GUC_HXG_TYPE_EVENT:
+ ret = parse_g2h_event(ct, msg, len);
+ break;
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ ret = parse_g2h_response(ct, msg, len);
+ break;
+ default:
+ xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
+ type);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE);
+
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action, adj_len;
+ u32 *payload;
+ int ret = 0;
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
+ adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ /* Selftest only at the moment */
+ break;
+ case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+ /* FIXME: Handle this */
+ break;
+ case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+ ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+ ret = xe_guc_access_counter_notify_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
+ break;
+ case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
+ ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
+ ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ default:
+ xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, PROCESS_FAILED);
+ }
+
+ return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+ u32 tail, head, len, desc_status;
+ s32 avail;
+ u32 action;
+ u32 *hxg;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED)
+ return -ENODEV;
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED)
+ return -ECANCELED;
+
+ if (g2h->info.broken)
+ return -EPIPE;
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ desc_status = desc_read(xe, g2h, status);
+ if (desc_status) {
+ if (desc_status & GUC_CTB_STATUS_DISABLED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
+ desc_status &= ~GUC_CTB_STATUS_DISABLED;
+ }
+
+ if (desc_status) {
+ xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, g2h, tail);
+ /*
+ u32 desc_head = desc_read(xe, g2h, head);
+
+ * info.head and desc_head are updated back-to-back at the end of
+ * this function and nowhere else. Hence, they cannot be different
+ * unless two g2h_read calls are running concurrently. Which is not
+ * possible because it is guarded by ct->fast_lock. And yet, some
+ * discrete platforms are reguarly hitting this error :(.
+ *
+ * desc_head rolling backwards shouldn't cause any noticeable
+ * problems - just a delay in GuC being allowed to proceed past that
+ * point in the queue. So for now, just disable the error until it
+ * can be root caused.
+ *
+ if (g2h->info.head != desc_head) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT read: head was modified %u != %u\n",
+ desc_head, g2h->info.head);
+ goto corrupted;
+ }
+ */
+
+ if (g2h->info.head > g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
+ g2h->info.head, g2h->info.size);
+ goto corrupted;
+ }
+
+ if (desc_tail >= g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n",
+ desc_tail, g2h->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Calculate DW available to read */
+ tail = desc_read(xe, g2h, tail);
+ avail = tail - g2h->info.head;
+ if (unlikely(avail == 0))
+ return 0;
+
+ if (avail < 0)
+ avail += g2h->info.size;
+
+ /* Read header */
+ xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
+ sizeof(u32));
+ len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+ if (len > avail) {
+ xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
+ avail, len);
+ goto corrupted;
+ }
+
+ head = (g2h->info.head + 1) % g2h->info.size;
+ avail = len - 1;
+
+ /* Read G2H message */
+ if (avail + head > g2h->info.size) {
+ u32 avail_til_wrap = g2h->info.size - head;
+
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail_til_wrap * sizeof(u32));
+ xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+ &g2h->cmds, 0,
+ (avail - avail_til_wrap) * sizeof(u32));
+ } else {
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail * sizeof(u32));
+ }
+
+ hxg = msg_to_hxg(msg);
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ if (fast_path) {
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ break; /* Process these in fast-path */
+ default:
+ return 0;
+ }
+ }
+
+ /* Update local / descriptor header */
+ g2h->info.head = (head + avail) % g2h->info.size;
+ desc_write(xe, g2h, head, g2h->info.head);
+
+ trace_xe_guc_ctb_g2h(xe, ct_to_gt(ct)->info.id,
+ action, len, g2h->info.head, tail);
+
+ return len;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ);
+ return -EPROTO;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
+ u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
+ int ret = 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ __g2h_release_space(ct, len);
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ default:
+ xe_gt_warn(gt, "NOT_POSSIBLE");
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, FAST_G2H);
+ }
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ bool ongoing;
+ int len;
+
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ spin_lock(&ct->fast_lock);
+ do {
+ len = g2h_read(ct, ct->fast_msg, true);
+ if (len > 0)
+ g2h_fast_path(ct, ct->fast_msg, len);
+ } while (len > 0);
+ spin_unlock(&ct->fast_lock);
+
+ if (ongoing)
+ xe_pm_runtime_put(xe);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+ int len;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ spin_lock_irq(&ct->fast_lock);
+ len = g2h_read(ct, ct->msg, false);
+ spin_unlock_irq(&ct->fast_lock);
+ if (len <= 0)
+ return len;
+
+ ret = parse_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = process_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 1;
+}
+
+static void receive_g2h(struct xe_guc_ct *ct)
+{
+ bool ongoing;
+ int ret;
+
+ /*
+ * Normal users must always hold mem_access.ref around CT calls. However
+ * during the runtime pm callbacks we rely on CT to talk to the GuC, but
+ * at this stage we can't rely on mem_access.ref and even the
+ * callback_task will be different than current. For such cases we just
+ * need to ensure we always process the responses from any blocking
+ * ct_send requests or where we otherwise expect some response when
+ * initiated from those callbacks (which will need to wait for the below
+ * dequeue_one_g2h()). The dequeue_one_g2h() will gracefully fail if
+ * the device has suspended to the point that the CT communication has
+ * been disabled.
+ *
+ * If we are inside the runtime pm callback, we can be the only task
+ * still issuing CT requests (since that requires having the
+ * mem_access.ref). It seems like it might in theory be possible to
+ * receive unsolicited events from the GuC just as we are
+ * suspending-resuming, but those will currently anyway be lost when
+ * eventually exiting from suspend, hence no need to wake up the device
+ * here. If we ever need something stronger than get_if_ongoing() then
+ * we need to be careful with blocking the pm callbacks from getting CT
+ * responses, if the worker here is blocked on those callbacks
+ * completing, creating a deadlock.
+ */
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ do {
+ mutex_lock(&ct->lock);
+ ret = dequeue_one_g2h(ct);
+ mutex_unlock(&ct->lock);
+
+ if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+ xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
+ CT_DEAD(ct, NULL, G2H_RECV);
+ kick_reset(ct);
+ }
+ } while (ret == 1);
+
+ if (ongoing)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+
+ receive_g2h(ct);
+}
+
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
+
+ if (ct->bo) {
+ snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
+ }
+
+ return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+ struct guc_ctb_snapshot *snapshot)
+{
+ xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+ sizeof(struct guc_ct_buffer_desc));
+ memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+}
+
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+ drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
+ drm_printf(p, "\thead: %d\n", snapshot->info.head);
+ drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+ drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+ drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+ drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+ drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+ drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+ bool atomic)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
+ if (!snapshot) {
+ xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
+ return NULL;
+ }
+
+ if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
+ snapshot->ct_enabled = true;
+ snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
+ }
+
+ if (ct->bo && snapshot->ctb)
+ xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
+ return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ if (!snapshot)
+ return;
+
+ if (snapshot->ct_enabled) {
+ drm_puts(p, "H2G CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->h2g, p);
+
+ drm_puts(p, "G2H CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->g2h, p);
+ drm_printf(p, "\tg2h outstanding: %d\n",
+ snapshot->g2h_outstanding);
+
+ if (snapshot->ctb) {
+ xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+ } else {
+ drm_printf(p, "CTB snapshot missing!\n");
+ return;
+ }
+ } else {
+ drm_puts(p, "CT disabled\n");
+ }
+}
+
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+ if (!snapshot)
+ return;
+
+ kfree(snapshot->ctb);
+ kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_capture(ct, false);
+ xe_guc_ct_snapshot_print(snapshot, p);
+ xe_guc_ct_snapshot_free(snapshot);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
+{
+ struct xe_guc_log_snapshot *snapshot_log;
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ struct xe_guc *guc = ct_to_guc(ct);
+ unsigned long flags;
+ bool have_capture;
+
+ if (ctb)
+ ctb->info.broken = true;
+
+ /* Ignore further errors after the first dump until a reset */
+ if (ct->dead.reported)
+ return;
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ /* And only capture one dump at a time */
+ have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
+ ct->dead.reason |= (1 << reason_code) |
+ (1 << CT_DEAD_STATE_CAPTURE);
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ if (have_capture)
+ return;
+
+ snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct), true);
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ if (ct->dead.snapshot_log || ct->dead.snapshot_ct) {
+ xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n");
+ xe_guc_log_snapshot_free(snapshot_log);
+ xe_guc_ct_snapshot_free(snapshot_ct);
+ } else {
+ ct->dead.snapshot_log = snapshot_log;
+ ct->dead.snapshot_ct = snapshot_ct;
+ }
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ queue_work(system_unbound_wq, &(ct)->dead.worker);
+}
+
+static void ct_dead_print(struct xe_dead_ct *dead)
+{
+ struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead);
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ if (!dead->reason) {
+ xe_gt_err(gt, "CTB is dead for no reason!?\n");
+ return;
+ }
+
+ drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
+
+ /* Can't generate a genuine core dump at this point, so just do the good bits */
+ drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ xe_device_snapshot_print(xe, &lp);
+
+ drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
+ drm_printf(&lp, "\tTile: %d\n", gt->tile->id);
+
+ drm_puts(&lp, "**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(dead->snapshot_log, &lp);
+
+ drm_puts(&lp, "**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp);
+
+ drm_puts(&lp, "Done.\n");
+}
+
+static void ct_dead_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker);
+
+ if (!ct->dead.reported) {
+ ct->dead.reported = true;
+ ct_dead_print(&ct->dead);
+ }
+
+ spin_lock_irq(&ct->dead.lock);
+
+ xe_guc_log_snapshot_free(ct->dead.snapshot_log);
+ ct->dead.snapshot_log = NULL;
+ xe_guc_ct_snapshot_free(ct->dead.snapshot_ct);
+ ct->dead.snapshot_ct = NULL;
+
+ if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) {
+ /* A reset has occurred so re-arm the error reporting */
+ ct->dead.reason = 0;
+ ct->dead.reported = false;
+ }
+
+ spin_unlock_irq(&ct->dead.lock);
+}
+#endif
diff --git a/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage
new file mode 100644
index 000000000000..d3838661f4b9
--- /dev/null
+++ b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage
@@ -0,0 +1,1866 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ct.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+
+#include <kunit/static_stub.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "xe_bo.h"
+#include "xe_devcoredump.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_monitor.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_log.h"
+#include "xe_guc_relay.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_trace_guc.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+enum {
+ /* Internal states, not error conditions */
+ CT_DEAD_STATE_REARM, /* 0x0001 */
+ CT_DEAD_STATE_CAPTURE, /* 0x0002 */
+
+ /* Error conditions */
+ CT_DEAD_SETUP, /* 0x0004 */
+ CT_DEAD_H2G_WRITE, /* 0x0008 */
+ CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */
+ CT_DEAD_G2H_READ, /* 0x0020 */
+ CT_DEAD_G2H_RECV, /* 0x0040 */
+ CT_DEAD_G2H_RELEASE, /* 0x0080 */
+ CT_DEAD_DEADLOCK, /* 0x0100 */
+ CT_DEAD_PROCESS_FAILED, /* 0x0200 */
+ CT_DEAD_FAST_G2H, /* 0x0400 */
+ CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */
+ CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
+ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
+ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+};
+
+static void ct_dead_worker_func(struct work_struct *w);
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+#else
+#define CT_DEAD(ct, ctb, reason) \
+ do { \
+ struct guc_ctb *_ctb = (ctb); \
+ if (_ctb) \
+ _ctb->info.broken = true; \
+ } while (0)
+#endif
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+ u32 *response_buffer;
+ u32 seqno;
+ u32 response_data;
+ u16 response_len;
+ u16 error;
+ u16 hint;
+ u16 reason;
+ bool retry;
+ bool fail;
+ bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+ g2h_fence->response_buffer = response_buffer;
+ g2h_fence->response_data = 0;
+ g2h_fence->response_len = 0;
+ g2h_fence->fail = false;
+ g2h_fence->retry = false;
+ g2h_fence->done = false;
+ g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+ return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+ return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ * +--------+-----------------------------------------------+------+
+ * | offset | contents | size |
+ * +========+===============================================+======+
+ * | 0x0000 | H2G CTB Descriptor (send) | |
+ * +--------+-----------------------------------------------+ 4K |
+ * | 0x0800 | G2H CTB Descriptor (g2h) | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | H2G CT Buffer (send) | n*4K |
+ * | | | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | G2H CT Buffer (g2h) | m*4K |
+ * | + n*4K | | |
+ * +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ *
+ * In addition to submissions, the G2H buffer needs to be able to hold
+ * enough space for recoverable page fault notifications. The number of
+ * page faults is interrupt driven and can be as much as the number of
+ * compute resources available. However, most of the actual work for these
+ * is in a separate page fault worker thread. Therefore we only need to
+ * make sure the queue has enough space to handle all of the submissions
+ * and responses and an extra buffer for incoming page faults.
+ */
+
+#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE (SZ_4K)
+#define CTB_G2H_BUFFER_SIZE (SZ_128K)
+#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2)
+
+/**
+ * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
+ * CT command queue
+ * @ct: the &xe_guc_ct. Unused at this moment but will be used in the future.
+ *
+ * Observation is that a 4KiB buffer full of commands takes a little over a
+ * second to process. Use that to calculate maximum time to process a full CT
+ * command queue.
+ *
+ * Return: Maximum time to process a full CT queue in jiffies.
+ */
+long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
+{
+ BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4));
+ return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ;
+}
+
+static size_t guc_ct_size(void)
+{
+ return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+ CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ destroy_workqueue(ct->g2h_wq);
+ xa_destroy(&ct->fence_lookup);
+}
+
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+ if (!IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&ct->lock);
+ fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ int err;
+
+ xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
+
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ if (!ct->g2h_wq)
+ return -ENOMEM;
+
+ spin_lock_init(&ct->fast_lock);
+ xa_init(&ct->fence_lookup);
+ INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#endif
+ init_waitqueue_head(&ct->wq);
+ init_waitqueue_head(&ct->g2h_fence_wq);
+
+ err = drmm_mutex_init(&xe->drm, &ct->lock);
+ if (err)
+ return err;
+
+ primelockdep(ct);
+
+ bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ct->bo = bo;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+ if (err)
+ return err;
+
+ xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+ ct->state = XE_GUC_CT_STATE_DISABLED;
+ return 0;
+}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
+
+#define desc_read(xe_, guc_ctb__, field_) \
+ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_) \
+ xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+ struct iosys_map *map)
+{
+ h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+ h2g->info.resv_space = 0;
+ h2g->info.tail = 0;
+ h2g->info.head = 0;
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ h2g->info.broken = false;
+
+ h2g->desc = *map;
+ xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+ struct iosys_map *map)
+{
+ g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+ g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+ g2h->info.head = 0;
+ g2h->info.tail = 0;
+ g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
+ g2h->info.size) -
+ g2h->info.resv_space;
+ g2h->info.broken = false;
+
+ g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+ xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo);
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+ size = ct->ctbs.h2g.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE;
+ size = ct->ctbs.g2h.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+ u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+ GUC_ACTION_HOST2GUC_CONTROL_CTB),
+ FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+ enable ? GUC_CTB_CONTROL_ENABLE :
+ GUC_CTB_CONTROL_DISABLE),
+ };
+ int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state)
+{
+ mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+
+ xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
+ state == XE_GUC_CT_STATE_STOPPED);
+
+ if (ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+ ct->g2h_outstanding = 0;
+ ct->state = state;
+
+ spin_unlock_irq(&ct->fast_lock);
+
+ /*
+ * Lockdep doesn't like this under the fast lock and he destroy only
+ * needs to be serialized with the send path which ct lock provides.
+ */
+ xa_destroy(&ct->fence_lookup);
+
+ mutex_unlock(&ct->lock);
+}
+
+static bool ct_needs_safe_mode(struct xe_guc_ct *ct)
+{
+ return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev));
+}
+
+static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct)
+{
+ if (!ct_needs_safe_mode(ct))
+ return false;
+
+ queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10);
+ return true;
+}
+
+static void safe_mode_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work);
+
+ receive_g2h(ct);
+
+ if (!ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n");
+}
+
+static void ct_enter_safe_mode(struct xe_guc_ct *ct)
+{
+ if (ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n");
+}
+
+static void ct_exit_safe_mode(struct xe_guc_ct *ct)
+{
+ if (cancel_delayed_work_sync(&ct->safe_mode_worker))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n");
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ int err;
+
+ xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
+
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
+ guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+ guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+ err = guc_ct_ctb_h2g_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_ctb_g2h_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_control_toggle(ct, true);
+ if (err)
+ goto err_out;
+
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
+
+ smp_mb();
+ wake_up_all(&ct->wq);
+ xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
+
+ if (ct_needs_safe_mode(ct))
+ ct_enter_safe_mode(ct);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /*
+ * The CT has now been reset so the dumper can be re-armed
+ * after any existing dead state has been dumped.
+ */
+ spin_lock_irq(&ct->dead.lock);
+ if (ct->dead.reason)
+ ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ spin_unlock_irq(&ct->dead.lock);
+#endif
+
+ return 0;
+
+err_out:
+ xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, NULL, SETUP);
+
+ return err;
+}
+
+static void stop_g2h_handler(struct xe_guc_ct *ct)
+{
+ cancel_work_sync(&ct->g2h_worker);
+}
+
+/**
+ * xe_guc_ct_disable - Set GuC to disabled state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected
+ * in this transition.
+ */
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
+ ct_exit_safe_mode(ct);
+ stop_g2h_handler(ct);
+}
+
+/**
+ * xe_guc_ct_stop - Set GuC to stopped state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
+ */
+void xe_guc_ct_stop(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
+ stop_g2h_handler(ct);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ lockdep_assert_held(&ct->lock);
+
+ if (cmd_len > h2g->info.space) {
+ h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+
+ if (h2g->info.head > h2g->info.size) {
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 desc_status = desc_read(xe, h2g, status);
+
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+
+ xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n",
+ h2g->info.head, h2g->info.size);
+ CT_DEAD(ct, h2g, H2G_HAS_ROOM);
+ return false;
+ }
+
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ if (cmd_len > h2g->info.space)
+ return false;
+ }
+
+ return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ if (!g2h_len)
+ return true;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ return ct->ctbs.g2h.info.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+ lockdep_assert_held(&ct->lock);
+
+ if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+ return -EBUSY;
+
+ return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ lockdep_assert_held(&ct->lock);
+ ct->ctbs.h2g.info.space -= cmd_len;
+}
+
+static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+ xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
+ xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
+ (g2h_len && num_g2h));
+
+ if (g2h_len) {
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (!ct->g2h_outstanding)
+ xe_pm_runtime_get_noresume(ct_to_xe(ct));
+
+ ct->ctbs.g2h.info.space -= g2h_len;
+ ct->g2h_outstanding += num_g2h;
+ }
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ bool bad = false;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ bad = ct->ctbs.g2h.info.space + g2h_len >
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space;
+ bad |= !ct->g2h_outstanding;
+
+ if (bad) {
+ xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n",
+ ct->ctbs.g2h.info.space, g2h_len,
+ ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space,
+ ct->ctbs.g2h.info.space + g2h_len,
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space,
+ ct->g2h_outstanding);
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE);
+ return;
+ }
+
+ ct->ctbs.g2h.info.space += g2h_len;
+ if (!--ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ spin_lock_irq(&ct->fast_lock);
+ __g2h_release_space(ct, g2h_len);
+ spin_unlock_irq(&ct->fast_lock);
+}
+
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 ct_fence_value, bool want_response)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+ u32 cmd[H2G_CT_HEADERS];
+ u32 tail = h2g->info.tail;
+ u32 full_len;
+ struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+ tail * sizeof(u32));
+ u32 desc_status;
+
+ full_len = len + GUC_CTB_HDR_LEN;
+
+ lockdep_assert_held(&ct->lock);
+ xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, h2g, tail);
+ u32 desc_head = desc_read(xe, h2g, head);
+
+ if (tail != desc_tail) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
+ goto corrupted;
+ }
+
+ if (tail > h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
+ tail, h2g->info.size);
+ goto corrupted;
+ }
+
+ if (desc_head >= h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n",
+ desc_head, h2g->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Command will wrap, zero fill (NOPs), return and check credits again */
+ if (tail + full_len > h2g->info.size) {
+ xe_map_memset(xe, &map, 0, 0,
+ (h2g->info.size - tail) * sizeof(u32));
+ h2g_reserve_space(ct, (h2g->info.size - tail));
+ h2g->info.tail = 0;
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ return -EAGAIN;
+ }
+
+ /*
+ * dw0: CT header (including fence)
+ * dw1: HXG header (including action code)
+ * dw2+: action data
+ */
+ cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+ FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+ FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+ if (want_response) {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ } else {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ }
+
+ /* H2G header in cmd[1] replaces action[0] so: */
+ --len;
+ ++action;
+
+ /* Write H2G ensuring visable before descriptor update */
+ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+ xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
+ xe_device_wmb(xe);
+
+ /* Update local copies */
+ h2g->info.tail = (tail + full_len) % h2g->info.size;
+ h2g_reserve_space(ct, full_len);
+
+ /* Update descriptor */
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len,
+ desc_read(xe, h2g, head), h2g->info.tail);
+
+ return 0;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE);
+ return -EPIPE;
+}
+
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+ u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+ if (!is_g2h_fence)
+ seqno |= CT_SEQNO_UNTRACKED;
+
+ return seqno;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
+ u16 seqno;
+ int ret;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ xe_gt_assert(gt, !num_g2h || !g2h_fence);
+ xe_gt_assert(gt, !g2h_len || num_g2h);
+ xe_gt_assert(gt, g2h_len || !num_g2h);
+ lockdep_assert_held(&ct->lock);
+
+ if (unlikely(ct->ctbs.h2g.info.broken)) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED) {
+ ret = -ECANCELED;
+ goto out;
+ }
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ if (g2h_fence) {
+ g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+ num_g2h = 1;
+
+ if (g2h_fence_needs_alloc(g2h_fence)) {
+ g2h_fence->seqno = next_ct_seqno(ct, true);
+ ret = xa_err(xa_store(&ct->fence_lookup,
+ g2h_fence->seqno, g2h_fence,
+ GFP_ATOMIC));
+ if (ret)
+ goto out;
+ }
+
+ seqno = g2h_fence->seqno;
+ } else {
+ seqno = next_ct_seqno(ct, false);
+ }
+
+ if (g2h_len)
+ spin_lock_irq(&ct->fast_lock);
+retry:
+ ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+ if (unlikely(ret))
+ goto out_unlock;
+
+ ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
+ if (unlikely(ret)) {
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out_unlock;
+ }
+
+ __g2h_reserve_space(ct, g2h_len, num_g2h);
+ xe_guc_notify(ct_to_guc(ct));
+out_unlock:
+ if (g2h_len)
+ spin_unlock_irq(&ct->fast_lock);
+out:
+ return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+ xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ unsigned int sleep_period_ms = 1;
+ int ret;
+
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ lockdep_assert_held(&ct->lock);
+ xe_device_assert_mem_access(ct_to_xe(ct));
+
+try_again:
+ ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+ g2h_fence);
+
+ /*
+ * We wait to try to restore credits for about 1 second before bailing.
+ * In the case of H2G credits we have no choice but just to wait for the
+ * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+ * the case of G2H we process any G2H in the channel, hopefully freeing
+ * credits as we consume the G2H messages.
+ */
+ if (unlikely(ret == -EBUSY &&
+ !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ if (sleep_period_ms == 1024)
+ goto broken;
+
+ trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail,
+ h2g->info.size,
+ h2g->info.space,
+ len + GUC_CTB_HDR_LEN);
+ msleep(sleep_period_ms);
+ sleep_period_ms <<= 1;
+
+ goto try_again;
+ } else if (unlikely(ret == -EBUSY)) {
+ struct xe_device *xe = ct_to_xe(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+ trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head,
+ desc_read(xe, g2h, tail),
+ g2h->info.size,
+ g2h->info.space,
+ g2h_fence ?
+ GUC_CTB_HXG_MSG_MAX_LEN :
+ g2h_len);
+
+#define g2h_avail(ct) \
+ (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
+ if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+ g2h_avail(ct), HZ))
+ goto broken;
+#undef g2h_avail
+
+ ret = dequeue_one_g2h(ct);
+ if (ret < 0) {
+ if (ret != -ECANCELED)
+ xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
+ ERR_PTR(ret));
+ goto broken;
+ }
+
+ goto try_again;
+ }
+
+ return ret;
+
+broken:
+ xe_gt_err(gt, "No forward process on H2G, reset required\n");
+ CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK);
+
+ return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+ int ret;
+
+ xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
+
+ mutex_lock(&ct->lock);
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+ if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+ return false;
+
+#define ct_alive(ct) \
+ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
+ !ct->ctbs.g2h.info.broken)
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ return false;
+#undef ct_alive
+
+ return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer, bool no_fail)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct g2h_fence g2h_fence;
+ int ret = 0;
+
+ /*
+ * We use a fence to implement blocking sends / receiving response data.
+ * The seqno of the fence is sent in the H2G, returned in the G2H, and
+ * an xarray is used as storage media with the seqno being to key.
+ * Fields in the fence hold success, failure, retry status and the
+ * response data. Safe to allocate on the stack as the xarray is the
+ * only reference and it cannot be present after this function exits.
+ */
+retry:
+ g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+ ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+ if (unlikely(ret == -ENOMEM)) {
+ /* Retry allocation /w GFP_KERNEL */
+ ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL));
+ if (ret)
+ return ret;
+
+ goto retry_same_fence;
+ } else if (unlikely(ret)) {
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ if (no_fail && retry_failure(ct, ret))
+ goto retry_same_fence;
+
+ if (!g2h_fence_needs_alloc(&g2h_fence))
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+
+ return ret;
+ }
+
+ ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+<<<<<<<
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+=======
+ * Occasionally it is seen that the G2H worker starts running after a delay of more than
+ * a second even after being queued and activated by the Linux workqueue subsystem. This
+ * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
+ * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS
+ * and this is beyond xe kmd.
+ *
+ * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
+ */
+ if (!ret) {
+ flush_work(&ct->g2h_worker);
+ if (g2h_fence.done) {
+ xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
+ g2h_fence.seqno, action[0]);
+ ret = 1;
+ }
+ }
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+>>>>>>>
+ mutex_unlock(&ct->lock);
+ return -ETIME;
+ }
+
+ if (g2h_fence.retry) {
+ xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
+ action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
+ goto retry;
+ }
+ if (g2h_fence.fail) {
+ xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n",
+ action[0], g2h_fence.error, g2h_fence.hint);
+ ret = -EIO;
+ }
+
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+/**
+ * xe_guc_ct_send_recv - Send and receive HXG to the GuC
+ * @ct: the &xe_guc_ct
+ * @action: the dword array with `HXG Request`_ message (can't be NULL)
+ * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
+ * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
+ *
+ * Send a `HXG Request`_ message to the GuC over CT communication channel and
+ * blocks until GuC replies with a `HXG Response`_ message.
+ *
+ * For non-blocking communication with GuC use xe_guc_ct_send().
+ *
+ * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
+ *
+ * Return: response length (in dwords) if &response_buffer was not NULL, or
+ * DATA0 from `HXG Response`_ if &response_buffer was NULL, or
+ * a negative error code on failure.
+ */
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer)
+{
+ KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
+ return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 *response_buffer)
+{
+ return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static u32 *msg_to_hxg(u32 *msg)
+{
+ return msg + GUC_CTB_MSG_MIN_LEN;
+}
+
+static u32 msg_len_to_hxg_len(u32 len)
+{
+ return len - GUC_CTB_MSG_MIN_LEN;
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ lockdep_assert_held(&ct->lock);
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ g2h_release_space(ct, len);
+ }
+
+ return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+ u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ struct g2h_fence *g2h_fence;
+
+ lockdep_assert_held(&ct->lock);
+
+ /*
+ * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
+ * Those messages should never fail, so if we do get an error back it
+ * means we're likely doing an illegal operation and the GuC is
+ * rejecting it. We have no way to inform the code that submitted the
+ * H2G that the message was rejected, so we need to escalate the
+ * failure to trigger a reset.
+ */
+ if (fence & CT_SEQNO_UNTRACKED) {
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
+ xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
+ fence,
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
+ else
+ xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
+ type, fence);
+ CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
+
+ return -EPROTO;
+ }
+
+ g2h_fence = xa_erase(&ct->fence_lookup, fence);
+ if (unlikely(!g2h_fence)) {
+ /* Don't tear down channel, as send could've timed out */
+ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
+ xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+ return 0;
+ }
+
+ xe_gt_assert(gt, fence == g2h_fence->seqno);
+
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+ g2h_fence->fail = true;
+ g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
+ g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
+ } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+ g2h_fence->retry = true;
+ g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
+ } else if (g2h_fence->response_buffer) {
+ g2h_fence->response_len = hxg_len;
+ memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
+ } else {
+ g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
+ }
+
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+ g2h_fence->done = true;
+ smp_mb();
+
+ wake_up_all(&ct->g2h_fence_wq);
+
+ return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 origin, type;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
+ if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+ xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
+ origin);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN);
+
+ return -EPROTO;
+ }
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ switch (type) {
+ case GUC_HXG_TYPE_EVENT:
+ ret = parse_g2h_event(ct, msg, len);
+ break;
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ ret = parse_g2h_response(ct, msg, len);
+ break;
+ default:
+ xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
+ type);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE);
+
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action, adj_len;
+ u32 *payload;
+ int ret = 0;
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
+ adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ /* Selftest only at the moment */
+ break;
+ case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+ /* FIXME: Handle this */
+ break;
+ case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+ ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+ ret = xe_guc_access_counter_notify_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
+ break;
+ case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
+ ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
+ ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ default:
+ xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, PROCESS_FAILED);
+ }
+
+ return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+ u32 tail, head, len, desc_status;
+ s32 avail;
+ u32 action;
+ u32 *hxg;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED)
+ return -ENODEV;
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED)
+ return -ECANCELED;
+
+ if (g2h->info.broken)
+ return -EPIPE;
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ desc_status = desc_read(xe, g2h, status);
+ if (desc_status) {
+ if (desc_status & GUC_CTB_STATUS_DISABLED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
+ desc_status &= ~GUC_CTB_STATUS_DISABLED;
+ }
+
+ if (desc_status) {
+ xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, g2h, tail);
+ /*
+ u32 desc_head = desc_read(xe, g2h, head);
+
+ * info.head and desc_head are updated back-to-back at the end of
+ * this function and nowhere else. Hence, they cannot be different
+ * unless two g2h_read calls are running concurrently. Which is not
+ * possible because it is guarded by ct->fast_lock. And yet, some
+ * discrete platforms are reguarly hitting this error :(.
+ *
+ * desc_head rolling backwards shouldn't cause any noticeable
+ * problems - just a delay in GuC being allowed to proceed past that
+ * point in the queue. So for now, just disable the error until it
+ * can be root caused.
+ *
+ if (g2h->info.head != desc_head) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT read: head was modified %u != %u\n",
+ desc_head, g2h->info.head);
+ goto corrupted;
+ }
+ */
+
+ if (g2h->info.head > g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
+ g2h->info.head, g2h->info.size);
+ goto corrupted;
+ }
+
+ if (desc_tail >= g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n",
+ desc_tail, g2h->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Calculate DW available to read */
+ tail = desc_read(xe, g2h, tail);
+ avail = tail - g2h->info.head;
+ if (unlikely(avail == 0))
+ return 0;
+
+ if (avail < 0)
+ avail += g2h->info.size;
+
+ /* Read header */
+ xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
+ sizeof(u32));
+ len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+ if (len > avail) {
+ xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
+ avail, len);
+ goto corrupted;
+ }
+
+ head = (g2h->info.head + 1) % g2h->info.size;
+ avail = len - 1;
+
+ /* Read G2H message */
+ if (avail + head > g2h->info.size) {
+ u32 avail_til_wrap = g2h->info.size - head;
+
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail_til_wrap * sizeof(u32));
+ xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+ &g2h->cmds, 0,
+ (avail - avail_til_wrap) * sizeof(u32));
+ } else {
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail * sizeof(u32));
+ }
+
+ hxg = msg_to_hxg(msg);
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ if (fast_path) {
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ break; /* Process these in fast-path */
+ default:
+ return 0;
+ }
+ }
+
+ /* Update local / descriptor header */
+ g2h->info.head = (head + avail) % g2h->info.size;
+ desc_write(xe, g2h, head, g2h->info.head);
+
+ trace_xe_guc_ctb_g2h(xe, ct_to_gt(ct)->info.id,
+ action, len, g2h->info.head, tail);
+
+ return len;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ);
+ return -EPROTO;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
+ u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
+ int ret = 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ __g2h_release_space(ct, len);
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ default:
+ xe_gt_warn(gt, "NOT_POSSIBLE");
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, FAST_G2H);
+ }
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ bool ongoing;
+ int len;
+
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ spin_lock(&ct->fast_lock);
+ do {
+ len = g2h_read(ct, ct->fast_msg, true);
+ if (len > 0)
+ g2h_fast_path(ct, ct->fast_msg, len);
+ } while (len > 0);
+ spin_unlock(&ct->fast_lock);
+
+ if (ongoing)
+ xe_pm_runtime_put(xe);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+ int len;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ spin_lock_irq(&ct->fast_lock);
+ len = g2h_read(ct, ct->msg, false);
+ spin_unlock_irq(&ct->fast_lock);
+ if (len <= 0)
+ return len;
+
+ ret = parse_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = process_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 1;
+}
+
+static void receive_g2h(struct xe_guc_ct *ct)
+{
+ bool ongoing;
+ int ret;
+
+ /*
+ * Normal users must always hold mem_access.ref around CT calls. However
+ * during the runtime pm callbacks we rely on CT to talk to the GuC, but
+ * at this stage we can't rely on mem_access.ref and even the
+ * callback_task will be different than current. For such cases we just
+ * need to ensure we always process the responses from any blocking
+ * ct_send requests or where we otherwise expect some response when
+ * initiated from those callbacks (which will need to wait for the below
+ * dequeue_one_g2h()). The dequeue_one_g2h() will gracefully fail if
+ * the device has suspended to the point that the CT communication has
+ * been disabled.
+ *
+ * If we are inside the runtime pm callback, we can be the only task
+ * still issuing CT requests (since that requires having the
+ * mem_access.ref). It seems like it might in theory be possible to
+ * receive unsolicited events from the GuC just as we are
+ * suspending-resuming, but those will currently anyway be lost when
+ * eventually exiting from suspend, hence no need to wake up the device
+ * here. If we ever need something stronger than get_if_ongoing() then
+ * we need to be careful with blocking the pm callbacks from getting CT
+ * responses, if the worker here is blocked on those callbacks
+ * completing, creating a deadlock.
+ */
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ do {
+ mutex_lock(&ct->lock);
+ ret = dequeue_one_g2h(ct);
+ mutex_unlock(&ct->lock);
+
+ if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+ xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
+ CT_DEAD(ct, NULL, G2H_RECV);
+ kick_reset(ct);
+ }
+ } while (ret == 1);
+
+ if (ongoing)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+
+ receive_g2h(ct);
+}
+
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
+
+ if (ct->bo) {
+ snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
+ }
+
+ return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+ struct guc_ctb_snapshot *snapshot)
+{
+ xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+ sizeof(struct guc_ct_buffer_desc));
+ memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+}
+
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+ drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
+ drm_printf(p, "\thead: %d\n", snapshot->info.head);
+ drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+ drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+ drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+ drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+ drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+ drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+ bool atomic)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
+ if (!snapshot) {
+ xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
+ return NULL;
+ }
+
+ if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
+ snapshot->ct_enabled = true;
+ snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
+ }
+
+ if (ct->bo && snapshot->ctb)
+ xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
+ return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ if (!snapshot)
+ return;
+
+ if (snapshot->ct_enabled) {
+ drm_puts(p, "H2G CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->h2g, p);
+
+ drm_puts(p, "G2H CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->g2h, p);
+ drm_printf(p, "\tg2h outstanding: %d\n",
+ snapshot->g2h_outstanding);
+
+ if (snapshot->ctb) {
+ xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+ } else {
+ drm_printf(p, "CTB snapshot missing!\n");
+ return;
+ }
+ } else {
+ drm_puts(p, "CT disabled\n");
+ }
+}
+
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+ if (!snapshot)
+ return;
+
+ kfree(snapshot->ctb);
+ kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_capture(ct, false);
+ xe_guc_ct_snapshot_print(snapshot, p);
+ xe_guc_ct_snapshot_free(snapshot);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
+{
+ struct xe_guc_log_snapshot *snapshot_log;
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ struct xe_guc *guc = ct_to_guc(ct);
+ unsigned long flags;
+ bool have_capture;
+
+ if (ctb)
+ ctb->info.broken = true;
+
+ /* Ignore further errors after the first dump until a reset */
+ if (ct->dead.reported)
+ return;
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ /* And only capture one dump at a time */
+ have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
+ ct->dead.reason |= (1 << reason_code) |
+ (1 << CT_DEAD_STATE_CAPTURE);
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ if (have_capture)
+ return;
+
+ snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct), true);
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ if (ct->dead.snapshot_log || ct->dead.snapshot_ct) {
+ xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n");
+ xe_guc_log_snapshot_free(snapshot_log);
+ xe_guc_ct_snapshot_free(snapshot_ct);
+ } else {
+ ct->dead.snapshot_log = snapshot_log;
+ ct->dead.snapshot_ct = snapshot_ct;
+ }
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ queue_work(system_unbound_wq, &(ct)->dead.worker);
+}
+
+static void ct_dead_print(struct xe_dead_ct *dead)
+{
+ struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead);
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ if (!dead->reason) {
+ xe_gt_err(gt, "CTB is dead for no reason!?\n");
+ return;
+ }
+
+ drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
+
+ /* Can't generate a genuine core dump at this point, so just do the good bits */
+ drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ xe_device_snapshot_print(xe, &lp);
+
+ drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
+ drm_printf(&lp, "\tTile: %d\n", gt->tile->id);
+
+ drm_puts(&lp, "**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(dead->snapshot_log, &lp);
+
+ drm_puts(&lp, "**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp);
+
+ drm_puts(&lp, "Done.\n");
+}
+
+static void ct_dead_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker);
+
+ if (!ct->dead.reported) {
+ ct->dead.reported = true;
+ ct_dead_print(&ct->dead);
+ }
+
+ spin_lock_irq(&ct->dead.lock);
+
+ xe_guc_log_snapshot_free(ct->dead.snapshot_log);
+ ct->dead.snapshot_log = NULL;
+ xe_guc_ct_snapshot_free(ct->dead.snapshot_ct);
+ ct->dead.snapshot_ct = NULL;
+
+ if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) {
+ /* A reset has occurred so re-arm the error reporting */
+ ct->dead.reason = 0;
+ ct->dead.reported = false;
+ }
+
+ spin_unlock_irq(&ct->dead.lock);
+}
+#endif
diff --git a/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.1 b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.1
new file mode 100644
index 000000000000..d3838661f4b9
--- /dev/null
+++ b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.1
@@ -0,0 +1,1866 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ct.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+
+#include <kunit/static_stub.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "xe_bo.h"
+#include "xe_devcoredump.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_monitor.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_log.h"
+#include "xe_guc_relay.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_trace_guc.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+enum {
+ /* Internal states, not error conditions */
+ CT_DEAD_STATE_REARM, /* 0x0001 */
+ CT_DEAD_STATE_CAPTURE, /* 0x0002 */
+
+ /* Error conditions */
+ CT_DEAD_SETUP, /* 0x0004 */
+ CT_DEAD_H2G_WRITE, /* 0x0008 */
+ CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */
+ CT_DEAD_G2H_READ, /* 0x0020 */
+ CT_DEAD_G2H_RECV, /* 0x0040 */
+ CT_DEAD_G2H_RELEASE, /* 0x0080 */
+ CT_DEAD_DEADLOCK, /* 0x0100 */
+ CT_DEAD_PROCESS_FAILED, /* 0x0200 */
+ CT_DEAD_FAST_G2H, /* 0x0400 */
+ CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */
+ CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
+ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
+ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+};
+
+static void ct_dead_worker_func(struct work_struct *w);
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+#else
+#define CT_DEAD(ct, ctb, reason) \
+ do { \
+ struct guc_ctb *_ctb = (ctb); \
+ if (_ctb) \
+ _ctb->info.broken = true; \
+ } while (0)
+#endif
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+ u32 *response_buffer;
+ u32 seqno;
+ u32 response_data;
+ u16 response_len;
+ u16 error;
+ u16 hint;
+ u16 reason;
+ bool retry;
+ bool fail;
+ bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+ g2h_fence->response_buffer = response_buffer;
+ g2h_fence->response_data = 0;
+ g2h_fence->response_len = 0;
+ g2h_fence->fail = false;
+ g2h_fence->retry = false;
+ g2h_fence->done = false;
+ g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+ return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+ return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ * +--------+-----------------------------------------------+------+
+ * | offset | contents | size |
+ * +========+===============================================+======+
+ * | 0x0000 | H2G CTB Descriptor (send) | |
+ * +--------+-----------------------------------------------+ 4K |
+ * | 0x0800 | G2H CTB Descriptor (g2h) | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | H2G CT Buffer (send) | n*4K |
+ * | | | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | G2H CT Buffer (g2h) | m*4K |
+ * | + n*4K | | |
+ * +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ *
+ * In addition to submissions, the G2H buffer needs to be able to hold
+ * enough space for recoverable page fault notifications. The number of
+ * page faults is interrupt driven and can be as much as the number of
+ * compute resources available. However, most of the actual work for these
+ * is in a separate page fault worker thread. Therefore we only need to
+ * make sure the queue has enough space to handle all of the submissions
+ * and responses and an extra buffer for incoming page faults.
+ */
+
+#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE (SZ_4K)
+#define CTB_G2H_BUFFER_SIZE (SZ_128K)
+#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2)
+
+/**
+ * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
+ * CT command queue
+ * @ct: the &xe_guc_ct. Unused at this moment but will be used in the future.
+ *
+ * Observation is that a 4KiB buffer full of commands takes a little over a
+ * second to process. Use that to calculate maximum time to process a full CT
+ * command queue.
+ *
+ * Return: Maximum time to process a full CT queue in jiffies.
+ */
+long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
+{
+ BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4));
+ return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ;
+}
+
+static size_t guc_ct_size(void)
+{
+ return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+ CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ destroy_workqueue(ct->g2h_wq);
+ xa_destroy(&ct->fence_lookup);
+}
+
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+ if (!IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&ct->lock);
+ fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ int err;
+
+ xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
+
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ if (!ct->g2h_wq)
+ return -ENOMEM;
+
+ spin_lock_init(&ct->fast_lock);
+ xa_init(&ct->fence_lookup);
+ INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#endif
+ init_waitqueue_head(&ct->wq);
+ init_waitqueue_head(&ct->g2h_fence_wq);
+
+ err = drmm_mutex_init(&xe->drm, &ct->lock);
+ if (err)
+ return err;
+
+ primelockdep(ct);
+
+ bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ct->bo = bo;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+ if (err)
+ return err;
+
+ xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+ ct->state = XE_GUC_CT_STATE_DISABLED;
+ return 0;
+}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
+
+#define desc_read(xe_, guc_ctb__, field_) \
+ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_) \
+ xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+ struct iosys_map *map)
+{
+ h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+ h2g->info.resv_space = 0;
+ h2g->info.tail = 0;
+ h2g->info.head = 0;
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ h2g->info.broken = false;
+
+ h2g->desc = *map;
+ xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+ struct iosys_map *map)
+{
+ g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+ g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+ g2h->info.head = 0;
+ g2h->info.tail = 0;
+ g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
+ g2h->info.size) -
+ g2h->info.resv_space;
+ g2h->info.broken = false;
+
+ g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+ xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo);
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+ size = ct->ctbs.h2g.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE;
+ size = ct->ctbs.g2h.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+ u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+ GUC_ACTION_HOST2GUC_CONTROL_CTB),
+ FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+ enable ? GUC_CTB_CONTROL_ENABLE :
+ GUC_CTB_CONTROL_DISABLE),
+ };
+ int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state)
+{
+ mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+
+ xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
+ state == XE_GUC_CT_STATE_STOPPED);
+
+ if (ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+ ct->g2h_outstanding = 0;
+ ct->state = state;
+
+ spin_unlock_irq(&ct->fast_lock);
+
+ /*
+ * Lockdep doesn't like this under the fast lock and he destroy only
+ * needs to be serialized with the send path which ct lock provides.
+ */
+ xa_destroy(&ct->fence_lookup);
+
+ mutex_unlock(&ct->lock);
+}
+
+static bool ct_needs_safe_mode(struct xe_guc_ct *ct)
+{
+ return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev));
+}
+
+static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct)
+{
+ if (!ct_needs_safe_mode(ct))
+ return false;
+
+ queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10);
+ return true;
+}
+
+static void safe_mode_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work);
+
+ receive_g2h(ct);
+
+ if (!ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n");
+}
+
+static void ct_enter_safe_mode(struct xe_guc_ct *ct)
+{
+ if (ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n");
+}
+
+static void ct_exit_safe_mode(struct xe_guc_ct *ct)
+{
+ if (cancel_delayed_work_sync(&ct->safe_mode_worker))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n");
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ int err;
+
+ xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
+
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
+ guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+ guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+ err = guc_ct_ctb_h2g_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_ctb_g2h_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_control_toggle(ct, true);
+ if (err)
+ goto err_out;
+
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
+
+ smp_mb();
+ wake_up_all(&ct->wq);
+ xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
+
+ if (ct_needs_safe_mode(ct))
+ ct_enter_safe_mode(ct);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /*
+ * The CT has now been reset so the dumper can be re-armed
+ * after any existing dead state has been dumped.
+ */
+ spin_lock_irq(&ct->dead.lock);
+ if (ct->dead.reason)
+ ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ spin_unlock_irq(&ct->dead.lock);
+#endif
+
+ return 0;
+
+err_out:
+ xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, NULL, SETUP);
+
+ return err;
+}
+
+static void stop_g2h_handler(struct xe_guc_ct *ct)
+{
+ cancel_work_sync(&ct->g2h_worker);
+}
+
+/**
+ * xe_guc_ct_disable - Set GuC to disabled state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected
+ * in this transition.
+ */
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
+ ct_exit_safe_mode(ct);
+ stop_g2h_handler(ct);
+}
+
+/**
+ * xe_guc_ct_stop - Set GuC to stopped state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
+ */
+void xe_guc_ct_stop(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
+ stop_g2h_handler(ct);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ lockdep_assert_held(&ct->lock);
+
+ if (cmd_len > h2g->info.space) {
+ h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+
+ if (h2g->info.head > h2g->info.size) {
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 desc_status = desc_read(xe, h2g, status);
+
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+
+ xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n",
+ h2g->info.head, h2g->info.size);
+ CT_DEAD(ct, h2g, H2G_HAS_ROOM);
+ return false;
+ }
+
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ if (cmd_len > h2g->info.space)
+ return false;
+ }
+
+ return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ if (!g2h_len)
+ return true;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ return ct->ctbs.g2h.info.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+ lockdep_assert_held(&ct->lock);
+
+ if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+ return -EBUSY;
+
+ return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ lockdep_assert_held(&ct->lock);
+ ct->ctbs.h2g.info.space -= cmd_len;
+}
+
+static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+ xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
+ xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
+ (g2h_len && num_g2h));
+
+ if (g2h_len) {
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (!ct->g2h_outstanding)
+ xe_pm_runtime_get_noresume(ct_to_xe(ct));
+
+ ct->ctbs.g2h.info.space -= g2h_len;
+ ct->g2h_outstanding += num_g2h;
+ }
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ bool bad = false;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ bad = ct->ctbs.g2h.info.space + g2h_len >
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space;
+ bad |= !ct->g2h_outstanding;
+
+ if (bad) {
+ xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n",
+ ct->ctbs.g2h.info.space, g2h_len,
+ ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space,
+ ct->ctbs.g2h.info.space + g2h_len,
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space,
+ ct->g2h_outstanding);
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE);
+ return;
+ }
+
+ ct->ctbs.g2h.info.space += g2h_len;
+ if (!--ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ spin_lock_irq(&ct->fast_lock);
+ __g2h_release_space(ct, g2h_len);
+ spin_unlock_irq(&ct->fast_lock);
+}
+
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 ct_fence_value, bool want_response)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+ u32 cmd[H2G_CT_HEADERS];
+ u32 tail = h2g->info.tail;
+ u32 full_len;
+ struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+ tail * sizeof(u32));
+ u32 desc_status;
+
+ full_len = len + GUC_CTB_HDR_LEN;
+
+ lockdep_assert_held(&ct->lock);
+ xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, h2g, tail);
+ u32 desc_head = desc_read(xe, h2g, head);
+
+ if (tail != desc_tail) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
+ goto corrupted;
+ }
+
+ if (tail > h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
+ tail, h2g->info.size);
+ goto corrupted;
+ }
+
+ if (desc_head >= h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n",
+ desc_head, h2g->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Command will wrap, zero fill (NOPs), return and check credits again */
+ if (tail + full_len > h2g->info.size) {
+ xe_map_memset(xe, &map, 0, 0,
+ (h2g->info.size - tail) * sizeof(u32));
+ h2g_reserve_space(ct, (h2g->info.size - tail));
+ h2g->info.tail = 0;
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ return -EAGAIN;
+ }
+
+ /*
+ * dw0: CT header (including fence)
+ * dw1: HXG header (including action code)
+ * dw2+: action data
+ */
+ cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+ FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+ FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+ if (want_response) {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ } else {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ }
+
+ /* H2G header in cmd[1] replaces action[0] so: */
+ --len;
+ ++action;
+
+ /* Write H2G ensuring visable before descriptor update */
+ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+ xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
+ xe_device_wmb(xe);
+
+ /* Update local copies */
+ h2g->info.tail = (tail + full_len) % h2g->info.size;
+ h2g_reserve_space(ct, full_len);
+
+ /* Update descriptor */
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len,
+ desc_read(xe, h2g, head), h2g->info.tail);
+
+ return 0;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE);
+ return -EPIPE;
+}
+
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+ u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+ if (!is_g2h_fence)
+ seqno |= CT_SEQNO_UNTRACKED;
+
+ return seqno;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
+ u16 seqno;
+ int ret;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ xe_gt_assert(gt, !num_g2h || !g2h_fence);
+ xe_gt_assert(gt, !g2h_len || num_g2h);
+ xe_gt_assert(gt, g2h_len || !num_g2h);
+ lockdep_assert_held(&ct->lock);
+
+ if (unlikely(ct->ctbs.h2g.info.broken)) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED) {
+ ret = -ECANCELED;
+ goto out;
+ }
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ if (g2h_fence) {
+ g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+ num_g2h = 1;
+
+ if (g2h_fence_needs_alloc(g2h_fence)) {
+ g2h_fence->seqno = next_ct_seqno(ct, true);
+ ret = xa_err(xa_store(&ct->fence_lookup,
+ g2h_fence->seqno, g2h_fence,
+ GFP_ATOMIC));
+ if (ret)
+ goto out;
+ }
+
+ seqno = g2h_fence->seqno;
+ } else {
+ seqno = next_ct_seqno(ct, false);
+ }
+
+ if (g2h_len)
+ spin_lock_irq(&ct->fast_lock);
+retry:
+ ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+ if (unlikely(ret))
+ goto out_unlock;
+
+ ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
+ if (unlikely(ret)) {
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out_unlock;
+ }
+
+ __g2h_reserve_space(ct, g2h_len, num_g2h);
+ xe_guc_notify(ct_to_guc(ct));
+out_unlock:
+ if (g2h_len)
+ spin_unlock_irq(&ct->fast_lock);
+out:
+ return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+ xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ unsigned int sleep_period_ms = 1;
+ int ret;
+
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ lockdep_assert_held(&ct->lock);
+ xe_device_assert_mem_access(ct_to_xe(ct));
+
+try_again:
+ ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+ g2h_fence);
+
+ /*
+ * We wait to try to restore credits for about 1 second before bailing.
+ * In the case of H2G credits we have no choice but just to wait for the
+ * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+ * the case of G2H we process any G2H in the channel, hopefully freeing
+ * credits as we consume the G2H messages.
+ */
+ if (unlikely(ret == -EBUSY &&
+ !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ if (sleep_period_ms == 1024)
+ goto broken;
+
+ trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail,
+ h2g->info.size,
+ h2g->info.space,
+ len + GUC_CTB_HDR_LEN);
+ msleep(sleep_period_ms);
+ sleep_period_ms <<= 1;
+
+ goto try_again;
+ } else if (unlikely(ret == -EBUSY)) {
+ struct xe_device *xe = ct_to_xe(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+ trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head,
+ desc_read(xe, g2h, tail),
+ g2h->info.size,
+ g2h->info.space,
+ g2h_fence ?
+ GUC_CTB_HXG_MSG_MAX_LEN :
+ g2h_len);
+
+#define g2h_avail(ct) \
+ (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
+ if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+ g2h_avail(ct), HZ))
+ goto broken;
+#undef g2h_avail
+
+ ret = dequeue_one_g2h(ct);
+ if (ret < 0) {
+ if (ret != -ECANCELED)
+ xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
+ ERR_PTR(ret));
+ goto broken;
+ }
+
+ goto try_again;
+ }
+
+ return ret;
+
+broken:
+ xe_gt_err(gt, "No forward process on H2G, reset required\n");
+ CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK);
+
+ return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+ int ret;
+
+ xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
+
+ mutex_lock(&ct->lock);
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+ if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+ return false;
+
+#define ct_alive(ct) \
+ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
+ !ct->ctbs.g2h.info.broken)
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ return false;
+#undef ct_alive
+
+ return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer, bool no_fail)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct g2h_fence g2h_fence;
+ int ret = 0;
+
+ /*
+ * We use a fence to implement blocking sends / receiving response data.
+ * The seqno of the fence is sent in the H2G, returned in the G2H, and
+ * an xarray is used as storage media with the seqno being to key.
+ * Fields in the fence hold success, failure, retry status and the
+ * response data. Safe to allocate on the stack as the xarray is the
+ * only reference and it cannot be present after this function exits.
+ */
+retry:
+ g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+ ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+ if (unlikely(ret == -ENOMEM)) {
+ /* Retry allocation /w GFP_KERNEL */
+ ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL));
+ if (ret)
+ return ret;
+
+ goto retry_same_fence;
+ } else if (unlikely(ret)) {
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ if (no_fail && retry_failure(ct, ret))
+ goto retry_same_fence;
+
+ if (!g2h_fence_needs_alloc(&g2h_fence))
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+
+ return ret;
+ }
+
+ ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+<<<<<<<
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+=======
+ * Occasionally it is seen that the G2H worker starts running after a delay of more than
+ * a second even after being queued and activated by the Linux workqueue subsystem. This
+ * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
+ * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS
+ * and this is beyond xe kmd.
+ *
+ * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
+ */
+ if (!ret) {
+ flush_work(&ct->g2h_worker);
+ if (g2h_fence.done) {
+ xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
+ g2h_fence.seqno, action[0]);
+ ret = 1;
+ }
+ }
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+>>>>>>>
+ mutex_unlock(&ct->lock);
+ return -ETIME;
+ }
+
+ if (g2h_fence.retry) {
+ xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
+ action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
+ goto retry;
+ }
+ if (g2h_fence.fail) {
+ xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n",
+ action[0], g2h_fence.error, g2h_fence.hint);
+ ret = -EIO;
+ }
+
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+/**
+ * xe_guc_ct_send_recv - Send and receive HXG to the GuC
+ * @ct: the &xe_guc_ct
+ * @action: the dword array with `HXG Request`_ message (can't be NULL)
+ * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
+ * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
+ *
+ * Send a `HXG Request`_ message to the GuC over CT communication channel and
+ * blocks until GuC replies with a `HXG Response`_ message.
+ *
+ * For non-blocking communication with GuC use xe_guc_ct_send().
+ *
+ * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
+ *
+ * Return: response length (in dwords) if &response_buffer was not NULL, or
+ * DATA0 from `HXG Response`_ if &response_buffer was NULL, or
+ * a negative error code on failure.
+ */
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer)
+{
+ KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
+ return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 *response_buffer)
+{
+ return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static u32 *msg_to_hxg(u32 *msg)
+{
+ return msg + GUC_CTB_MSG_MIN_LEN;
+}
+
+static u32 msg_len_to_hxg_len(u32 len)
+{
+ return len - GUC_CTB_MSG_MIN_LEN;
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ lockdep_assert_held(&ct->lock);
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ g2h_release_space(ct, len);
+ }
+
+ return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+ u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ struct g2h_fence *g2h_fence;
+
+ lockdep_assert_held(&ct->lock);
+
+ /*
+ * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
+ * Those messages should never fail, so if we do get an error back it
+ * means we're likely doing an illegal operation and the GuC is
+ * rejecting it. We have no way to inform the code that submitted the
+ * H2G that the message was rejected, so we need to escalate the
+ * failure to trigger a reset.
+ */
+ if (fence & CT_SEQNO_UNTRACKED) {
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
+ xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
+ fence,
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
+ else
+ xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
+ type, fence);
+ CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
+
+ return -EPROTO;
+ }
+
+ g2h_fence = xa_erase(&ct->fence_lookup, fence);
+ if (unlikely(!g2h_fence)) {
+ /* Don't tear down channel, as send could've timed out */
+ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
+ xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+ return 0;
+ }
+
+ xe_gt_assert(gt, fence == g2h_fence->seqno);
+
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+ g2h_fence->fail = true;
+ g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
+ g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
+ } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+ g2h_fence->retry = true;
+ g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
+ } else if (g2h_fence->response_buffer) {
+ g2h_fence->response_len = hxg_len;
+ memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
+ } else {
+ g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
+ }
+
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+ g2h_fence->done = true;
+ smp_mb();
+
+ wake_up_all(&ct->g2h_fence_wq);
+
+ return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 origin, type;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
+ if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+ xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
+ origin);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN);
+
+ return -EPROTO;
+ }
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ switch (type) {
+ case GUC_HXG_TYPE_EVENT:
+ ret = parse_g2h_event(ct, msg, len);
+ break;
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ ret = parse_g2h_response(ct, msg, len);
+ break;
+ default:
+ xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
+ type);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE);
+
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action, adj_len;
+ u32 *payload;
+ int ret = 0;
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
+ adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ /* Selftest only at the moment */
+ break;
+ case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+ /* FIXME: Handle this */
+ break;
+ case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+ ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+ ret = xe_guc_access_counter_notify_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
+ break;
+ case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
+ ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
+ ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ default:
+ xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, PROCESS_FAILED);
+ }
+
+ return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+ u32 tail, head, len, desc_status;
+ s32 avail;
+ u32 action;
+ u32 *hxg;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED)
+ return -ENODEV;
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED)
+ return -ECANCELED;
+
+ if (g2h->info.broken)
+ return -EPIPE;
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ desc_status = desc_read(xe, g2h, status);
+ if (desc_status) {
+ if (desc_status & GUC_CTB_STATUS_DISABLED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
+ desc_status &= ~GUC_CTB_STATUS_DISABLED;
+ }
+
+ if (desc_status) {
+ xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, g2h, tail);
+ /*
+ u32 desc_head = desc_read(xe, g2h, head);
+
+ * info.head and desc_head are updated back-to-back at the end of
+ * this function and nowhere else. Hence, they cannot be different
+ * unless two g2h_read calls are running concurrently. Which is not
+ * possible because it is guarded by ct->fast_lock. And yet, some
+ * discrete platforms are reguarly hitting this error :(.
+ *
+ * desc_head rolling backwards shouldn't cause any noticeable
+ * problems - just a delay in GuC being allowed to proceed past that
+ * point in the queue. So for now, just disable the error until it
+ * can be root caused.
+ *
+ if (g2h->info.head != desc_head) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT read: head was modified %u != %u\n",
+ desc_head, g2h->info.head);
+ goto corrupted;
+ }
+ */
+
+ if (g2h->info.head > g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
+ g2h->info.head, g2h->info.size);
+ goto corrupted;
+ }
+
+ if (desc_tail >= g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n",
+ desc_tail, g2h->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Calculate DW available to read */
+ tail = desc_read(xe, g2h, tail);
+ avail = tail - g2h->info.head;
+ if (unlikely(avail == 0))
+ return 0;
+
+ if (avail < 0)
+ avail += g2h->info.size;
+
+ /* Read header */
+ xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
+ sizeof(u32));
+ len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+ if (len > avail) {
+ xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
+ avail, len);
+ goto corrupted;
+ }
+
+ head = (g2h->info.head + 1) % g2h->info.size;
+ avail = len - 1;
+
+ /* Read G2H message */
+ if (avail + head > g2h->info.size) {
+ u32 avail_til_wrap = g2h->info.size - head;
+
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail_til_wrap * sizeof(u32));
+ xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+ &g2h->cmds, 0,
+ (avail - avail_til_wrap) * sizeof(u32));
+ } else {
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail * sizeof(u32));
+ }
+
+ hxg = msg_to_hxg(msg);
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ if (fast_path) {
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ break; /* Process these in fast-path */
+ default:
+ return 0;
+ }
+ }
+
+ /* Update local / descriptor header */
+ g2h->info.head = (head + avail) % g2h->info.size;
+ desc_write(xe, g2h, head, g2h->info.head);
+
+ trace_xe_guc_ctb_g2h(xe, ct_to_gt(ct)->info.id,
+ action, len, g2h->info.head, tail);
+
+ return len;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ);
+ return -EPROTO;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
+ u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
+ int ret = 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ __g2h_release_space(ct, len);
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ default:
+ xe_gt_warn(gt, "NOT_POSSIBLE");
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, FAST_G2H);
+ }
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ bool ongoing;
+ int len;
+
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ spin_lock(&ct->fast_lock);
+ do {
+ len = g2h_read(ct, ct->fast_msg, true);
+ if (len > 0)
+ g2h_fast_path(ct, ct->fast_msg, len);
+ } while (len > 0);
+ spin_unlock(&ct->fast_lock);
+
+ if (ongoing)
+ xe_pm_runtime_put(xe);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+ int len;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ spin_lock_irq(&ct->fast_lock);
+ len = g2h_read(ct, ct->msg, false);
+ spin_unlock_irq(&ct->fast_lock);
+ if (len <= 0)
+ return len;
+
+ ret = parse_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = process_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 1;
+}
+
+static void receive_g2h(struct xe_guc_ct *ct)
+{
+ bool ongoing;
+ int ret;
+
+ /*
+ * Normal users must always hold mem_access.ref around CT calls. However
+ * during the runtime pm callbacks we rely on CT to talk to the GuC, but
+ * at this stage we can't rely on mem_access.ref and even the
+ * callback_task will be different than current. For such cases we just
+ * need to ensure we always process the responses from any blocking
+ * ct_send requests or where we otherwise expect some response when
+ * initiated from those callbacks (which will need to wait for the below
+ * dequeue_one_g2h()). The dequeue_one_g2h() will gracefully fail if
+ * the device has suspended to the point that the CT communication has
+ * been disabled.
+ *
+ * If we are inside the runtime pm callback, we can be the only task
+ * still issuing CT requests (since that requires having the
+ * mem_access.ref). It seems like it might in theory be possible to
+ * receive unsolicited events from the GuC just as we are
+ * suspending-resuming, but those will currently anyway be lost when
+ * eventually exiting from suspend, hence no need to wake up the device
+ * here. If we ever need something stronger than get_if_ongoing() then
+ * we need to be careful with blocking the pm callbacks from getting CT
+ * responses, if the worker here is blocked on those callbacks
+ * completing, creating a deadlock.
+ */
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ do {
+ mutex_lock(&ct->lock);
+ ret = dequeue_one_g2h(ct);
+ mutex_unlock(&ct->lock);
+
+ if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+ xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
+ CT_DEAD(ct, NULL, G2H_RECV);
+ kick_reset(ct);
+ }
+ } while (ret == 1);
+
+ if (ongoing)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+
+ receive_g2h(ct);
+}
+
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
+
+ if (ct->bo) {
+ snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
+ }
+
+ return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+ struct guc_ctb_snapshot *snapshot)
+{
+ xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+ sizeof(struct guc_ct_buffer_desc));
+ memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+}
+
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+ drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
+ drm_printf(p, "\thead: %d\n", snapshot->info.head);
+ drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+ drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+ drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+ drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+ drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+ drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+ bool atomic)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
+ if (!snapshot) {
+ xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
+ return NULL;
+ }
+
+ if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
+ snapshot->ct_enabled = true;
+ snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
+ }
+
+ if (ct->bo && snapshot->ctb)
+ xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
+ return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ if (!snapshot)
+ return;
+
+ if (snapshot->ct_enabled) {
+ drm_puts(p, "H2G CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->h2g, p);
+
+ drm_puts(p, "G2H CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->g2h, p);
+ drm_printf(p, "\tg2h outstanding: %d\n",
+ snapshot->g2h_outstanding);
+
+ if (snapshot->ctb) {
+ xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+ } else {
+ drm_printf(p, "CTB snapshot missing!\n");
+ return;
+ }
+ } else {
+ drm_puts(p, "CT disabled\n");
+ }
+}
+
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+ if (!snapshot)
+ return;
+
+ kfree(snapshot->ctb);
+ kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_capture(ct, false);
+ xe_guc_ct_snapshot_print(snapshot, p);
+ xe_guc_ct_snapshot_free(snapshot);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
+{
+ struct xe_guc_log_snapshot *snapshot_log;
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ struct xe_guc *guc = ct_to_guc(ct);
+ unsigned long flags;
+ bool have_capture;
+
+ if (ctb)
+ ctb->info.broken = true;
+
+ /* Ignore further errors after the first dump until a reset */
+ if (ct->dead.reported)
+ return;
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ /* And only capture one dump at a time */
+ have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
+ ct->dead.reason |= (1 << reason_code) |
+ (1 << CT_DEAD_STATE_CAPTURE);
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ if (have_capture)
+ return;
+
+ snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct), true);
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ if (ct->dead.snapshot_log || ct->dead.snapshot_ct) {
+ xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n");
+ xe_guc_log_snapshot_free(snapshot_log);
+ xe_guc_ct_snapshot_free(snapshot_ct);
+ } else {
+ ct->dead.snapshot_log = snapshot_log;
+ ct->dead.snapshot_ct = snapshot_ct;
+ }
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ queue_work(system_unbound_wq, &(ct)->dead.worker);
+}
+
+static void ct_dead_print(struct xe_dead_ct *dead)
+{
+ struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead);
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ if (!dead->reason) {
+ xe_gt_err(gt, "CTB is dead for no reason!?\n");
+ return;
+ }
+
+ drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
+
+ /* Can't generate a genuine core dump at this point, so just do the good bits */
+ drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ xe_device_snapshot_print(xe, &lp);
+
+ drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
+ drm_printf(&lp, "\tTile: %d\n", gt->tile->id);
+
+ drm_puts(&lp, "**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(dead->snapshot_log, &lp);
+
+ drm_puts(&lp, "**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp);
+
+ drm_puts(&lp, "Done.\n");
+}
+
+static void ct_dead_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker);
+
+ if (!ct->dead.reported) {
+ ct->dead.reported = true;
+ ct_dead_print(&ct->dead);
+ }
+
+ spin_lock_irq(&ct->dead.lock);
+
+ xe_guc_log_snapshot_free(ct->dead.snapshot_log);
+ ct->dead.snapshot_log = NULL;
+ xe_guc_ct_snapshot_free(ct->dead.snapshot_ct);
+ ct->dead.snapshot_ct = NULL;
+
+ if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) {
+ /* A reset has occurred so re-arm the error reporting */
+ ct->dead.reason = 0;
+ ct->dead.reported = false;
+ }
+
+ spin_unlock_irq(&ct->dead.lock);
+}
+#endif
diff --git a/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.2 b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.2
new file mode 100644
index 000000000000..d3838661f4b9
--- /dev/null
+++ b/rr-cache/0ace7964b1b2d66224889bcf0b6a7a221776d8c8/preimage.2
@@ -0,0 +1,1866 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ct.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+
+#include <kunit/static_stub.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "xe_bo.h"
+#include "xe_devcoredump.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_monitor.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_log.h"
+#include "xe_guc_relay.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_trace_guc.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+enum {
+ /* Internal states, not error conditions */
+ CT_DEAD_STATE_REARM, /* 0x0001 */
+ CT_DEAD_STATE_CAPTURE, /* 0x0002 */
+
+ /* Error conditions */
+ CT_DEAD_SETUP, /* 0x0004 */
+ CT_DEAD_H2G_WRITE, /* 0x0008 */
+ CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */
+ CT_DEAD_G2H_READ, /* 0x0020 */
+ CT_DEAD_G2H_RECV, /* 0x0040 */
+ CT_DEAD_G2H_RELEASE, /* 0x0080 */
+ CT_DEAD_DEADLOCK, /* 0x0100 */
+ CT_DEAD_PROCESS_FAILED, /* 0x0200 */
+ CT_DEAD_FAST_G2H, /* 0x0400 */
+ CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */
+ CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
+ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
+ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+};
+
+static void ct_dead_worker_func(struct work_struct *w);
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+#else
+#define CT_DEAD(ct, ctb, reason) \
+ do { \
+ struct guc_ctb *_ctb = (ctb); \
+ if (_ctb) \
+ _ctb->info.broken = true; \
+ } while (0)
+#endif
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+ u32 *response_buffer;
+ u32 seqno;
+ u32 response_data;
+ u16 response_len;
+ u16 error;
+ u16 hint;
+ u16 reason;
+ bool retry;
+ bool fail;
+ bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+ g2h_fence->response_buffer = response_buffer;
+ g2h_fence->response_data = 0;
+ g2h_fence->response_len = 0;
+ g2h_fence->fail = false;
+ g2h_fence->retry = false;
+ g2h_fence->done = false;
+ g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+ return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+ return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ * +--------+-----------------------------------------------+------+
+ * | offset | contents | size |
+ * +========+===============================================+======+
+ * | 0x0000 | H2G CTB Descriptor (send) | |
+ * +--------+-----------------------------------------------+ 4K |
+ * | 0x0800 | G2H CTB Descriptor (g2h) | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | H2G CT Buffer (send) | n*4K |
+ * | | | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | G2H CT Buffer (g2h) | m*4K |
+ * | + n*4K | | |
+ * +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ *
+ * In addition to submissions, the G2H buffer needs to be able to hold
+ * enough space for recoverable page fault notifications. The number of
+ * page faults is interrupt driven and can be as much as the number of
+ * compute resources available. However, most of the actual work for these
+ * is in a separate page fault worker thread. Therefore we only need to
+ * make sure the queue has enough space to handle all of the submissions
+ * and responses and an extra buffer for incoming page faults.
+ */
+
+#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE (SZ_4K)
+#define CTB_G2H_BUFFER_SIZE (SZ_128K)
+#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2)
+
+/**
+ * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
+ * CT command queue
+ * @ct: the &xe_guc_ct. Unused at this moment but will be used in the future.
+ *
+ * Observation is that a 4KiB buffer full of commands takes a little over a
+ * second to process. Use that to calculate maximum time to process a full CT
+ * command queue.
+ *
+ * Return: Maximum time to process a full CT queue in jiffies.
+ */
+long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
+{
+ BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4));
+ return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ;
+}
+
+static size_t guc_ct_size(void)
+{
+ return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+ CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ destroy_workqueue(ct->g2h_wq);
+ xa_destroy(&ct->fence_lookup);
+}
+
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+ if (!IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&ct->lock);
+ fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ int err;
+
+ xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
+
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ if (!ct->g2h_wq)
+ return -ENOMEM;
+
+ spin_lock_init(&ct->fast_lock);
+ xa_init(&ct->fence_lookup);
+ INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#endif
+ init_waitqueue_head(&ct->wq);
+ init_waitqueue_head(&ct->g2h_fence_wq);
+
+ err = drmm_mutex_init(&xe->drm, &ct->lock);
+ if (err)
+ return err;
+
+ primelockdep(ct);
+
+ bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ct->bo = bo;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+ if (err)
+ return err;
+
+ xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+ ct->state = XE_GUC_CT_STATE_DISABLED;
+ return 0;
+}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
+
+#define desc_read(xe_, guc_ctb__, field_) \
+ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_) \
+ xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+ struct iosys_map *map)
+{
+ h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+ h2g->info.resv_space = 0;
+ h2g->info.tail = 0;
+ h2g->info.head = 0;
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ h2g->info.broken = false;
+
+ h2g->desc = *map;
+ xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+ struct iosys_map *map)
+{
+ g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+ g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+ g2h->info.head = 0;
+ g2h->info.tail = 0;
+ g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
+ g2h->info.size) -
+ g2h->info.resv_space;
+ g2h->info.broken = false;
+
+ g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+ xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo);
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+ size = ct->ctbs.h2g.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE;
+ size = ct->ctbs.g2h.info.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+ u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+ GUC_ACTION_HOST2GUC_CONTROL_CTB),
+ FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+ enable ? GUC_CTB_CONTROL_ENABLE :
+ GUC_CTB_CONTROL_DISABLE),
+ };
+ int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state)
+{
+ mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+
+ xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
+ state == XE_GUC_CT_STATE_STOPPED);
+
+ if (ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+ ct->g2h_outstanding = 0;
+ ct->state = state;
+
+ spin_unlock_irq(&ct->fast_lock);
+
+ /*
+ * Lockdep doesn't like this under the fast lock and he destroy only
+ * needs to be serialized with the send path which ct lock provides.
+ */
+ xa_destroy(&ct->fence_lookup);
+
+ mutex_unlock(&ct->lock);
+}
+
+static bool ct_needs_safe_mode(struct xe_guc_ct *ct)
+{
+ return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev));
+}
+
+static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct)
+{
+ if (!ct_needs_safe_mode(ct))
+ return false;
+
+ queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10);
+ return true;
+}
+
+static void safe_mode_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work);
+
+ receive_g2h(ct);
+
+ if (!ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n");
+}
+
+static void ct_enter_safe_mode(struct xe_guc_ct *ct)
+{
+ if (ct_restart_safe_mode_worker(ct))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n");
+}
+
+static void ct_exit_safe_mode(struct xe_guc_ct *ct)
+{
+ if (cancel_delayed_work_sync(&ct->safe_mode_worker))
+ xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n");
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ int err;
+
+ xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
+
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
+ guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+ guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+ err = guc_ct_ctb_h2g_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_ctb_g2h_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_control_toggle(ct, true);
+ if (err)
+ goto err_out;
+
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
+
+ smp_mb();
+ wake_up_all(&ct->wq);
+ xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
+
+ if (ct_needs_safe_mode(ct))
+ ct_enter_safe_mode(ct);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /*
+ * The CT has now been reset so the dumper can be re-armed
+ * after any existing dead state has been dumped.
+ */
+ spin_lock_irq(&ct->dead.lock);
+ if (ct->dead.reason)
+ ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ spin_unlock_irq(&ct->dead.lock);
+#endif
+
+ return 0;
+
+err_out:
+ xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, NULL, SETUP);
+
+ return err;
+}
+
+static void stop_g2h_handler(struct xe_guc_ct *ct)
+{
+ cancel_work_sync(&ct->g2h_worker);
+}
+
+/**
+ * xe_guc_ct_disable - Set GuC to disabled state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected
+ * in this transition.
+ */
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
+ ct_exit_safe_mode(ct);
+ stop_g2h_handler(ct);
+}
+
+/**
+ * xe_guc_ct_stop - Set GuC to stopped state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
+ */
+void xe_guc_ct_stop(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
+ stop_g2h_handler(ct);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ lockdep_assert_held(&ct->lock);
+
+ if (cmd_len > h2g->info.space) {
+ h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+
+ if (h2g->info.head > h2g->info.size) {
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 desc_status = desc_read(xe, h2g, status);
+
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+
+ xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n",
+ h2g->info.head, h2g->info.size);
+ CT_DEAD(ct, h2g, H2G_HAS_ROOM);
+ return false;
+ }
+
+ h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+ h2g->info.size) -
+ h2g->info.resv_space;
+ if (cmd_len > h2g->info.space)
+ return false;
+ }
+
+ return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ if (!g2h_len)
+ return true;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ return ct->ctbs.g2h.info.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+ lockdep_assert_held(&ct->lock);
+
+ if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+ return -EBUSY;
+
+ return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ lockdep_assert_held(&ct->lock);
+ ct->ctbs.h2g.info.space -= cmd_len;
+}
+
+static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+ xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
+ xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
+ (g2h_len && num_g2h));
+
+ if (g2h_len) {
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (!ct->g2h_outstanding)
+ xe_pm_runtime_get_noresume(ct_to_xe(ct));
+
+ ct->ctbs.g2h.info.space -= g2h_len;
+ ct->g2h_outstanding += num_g2h;
+ }
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ bool bad = false;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ bad = ct->ctbs.g2h.info.space + g2h_len >
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space;
+ bad |= !ct->g2h_outstanding;
+
+ if (bad) {
+ xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n",
+ ct->ctbs.g2h.info.space, g2h_len,
+ ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space,
+ ct->ctbs.g2h.info.space + g2h_len,
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space,
+ ct->g2h_outstanding);
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE);
+ return;
+ }
+
+ ct->ctbs.g2h.info.space += g2h_len;
+ if (!--ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ spin_lock_irq(&ct->fast_lock);
+ __g2h_release_space(ct, g2h_len);
+ spin_unlock_irq(&ct->fast_lock);
+}
+
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 ct_fence_value, bool want_response)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+ u32 cmd[H2G_CT_HEADERS];
+ u32 tail = h2g->info.tail;
+ u32 full_len;
+ struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+ tail * sizeof(u32));
+ u32 desc_status;
+
+ full_len = len + GUC_CTB_HDR_LEN;
+
+ lockdep_assert_held(&ct->lock);
+ xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, h2g, tail);
+ u32 desc_head = desc_read(xe, h2g, head);
+
+ if (tail != desc_tail) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
+ goto corrupted;
+ }
+
+ if (tail > h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
+ tail, h2g->info.size);
+ goto corrupted;
+ }
+
+ if (desc_head >= h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n",
+ desc_head, h2g->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Command will wrap, zero fill (NOPs), return and check credits again */
+ if (tail + full_len > h2g->info.size) {
+ xe_map_memset(xe, &map, 0, 0,
+ (h2g->info.size - tail) * sizeof(u32));
+ h2g_reserve_space(ct, (h2g->info.size - tail));
+ h2g->info.tail = 0;
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ return -EAGAIN;
+ }
+
+ /*
+ * dw0: CT header (including fence)
+ * dw1: HXG header (including action code)
+ * dw2+: action data
+ */
+ cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+ FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+ FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+ if (want_response) {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ } else {
+ cmd[1] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ }
+
+ /* H2G header in cmd[1] replaces action[0] so: */
+ --len;
+ ++action;
+
+ /* Write H2G ensuring visable before descriptor update */
+ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+ xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
+ xe_device_wmb(xe);
+
+ /* Update local copies */
+ h2g->info.tail = (tail + full_len) % h2g->info.size;
+ h2g_reserve_space(ct, full_len);
+
+ /* Update descriptor */
+ desc_write(xe, h2g, tail, h2g->info.tail);
+
+ trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len,
+ desc_read(xe, h2g, head), h2g->info.tail);
+
+ return 0;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE);
+ return -EPIPE;
+}
+
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+ u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+ if (!is_g2h_fence)
+ seqno |= CT_SEQNO_UNTRACKED;
+
+ return seqno;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
+ u16 seqno;
+ int ret;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ xe_gt_assert(gt, !num_g2h || !g2h_fence);
+ xe_gt_assert(gt, !g2h_len || num_g2h);
+ xe_gt_assert(gt, g2h_len || !num_g2h);
+ lockdep_assert_held(&ct->lock);
+
+ if (unlikely(ct->ctbs.h2g.info.broken)) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED) {
+ ret = -ECANCELED;
+ goto out;
+ }
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ if (g2h_fence) {
+ g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+ num_g2h = 1;
+
+ if (g2h_fence_needs_alloc(g2h_fence)) {
+ g2h_fence->seqno = next_ct_seqno(ct, true);
+ ret = xa_err(xa_store(&ct->fence_lookup,
+ g2h_fence->seqno, g2h_fence,
+ GFP_ATOMIC));
+ if (ret)
+ goto out;
+ }
+
+ seqno = g2h_fence->seqno;
+ } else {
+ seqno = next_ct_seqno(ct, false);
+ }
+
+ if (g2h_len)
+ spin_lock_irq(&ct->fast_lock);
+retry:
+ ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+ if (unlikely(ret))
+ goto out_unlock;
+
+ ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
+ if (unlikely(ret)) {
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out_unlock;
+ }
+
+ __g2h_reserve_space(ct, g2h_len, num_g2h);
+ xe_guc_notify(ct_to_guc(ct));
+out_unlock:
+ if (g2h_len)
+ spin_unlock_irq(&ct->fast_lock);
+out:
+ return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+ xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ unsigned int sleep_period_ms = 1;
+ int ret;
+
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ lockdep_assert_held(&ct->lock);
+ xe_device_assert_mem_access(ct_to_xe(ct));
+
+try_again:
+ ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+ g2h_fence);
+
+ /*
+ * We wait to try to restore credits for about 1 second before bailing.
+ * In the case of H2G credits we have no choice but just to wait for the
+ * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+ * the case of G2H we process any G2H in the channel, hopefully freeing
+ * credits as we consume the G2H messages.
+ */
+ if (unlikely(ret == -EBUSY &&
+ !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ if (sleep_period_ms == 1024)
+ goto broken;
+
+ trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail,
+ h2g->info.size,
+ h2g->info.space,
+ len + GUC_CTB_HDR_LEN);
+ msleep(sleep_period_ms);
+ sleep_period_ms <<= 1;
+
+ goto try_again;
+ } else if (unlikely(ret == -EBUSY)) {
+ struct xe_device *xe = ct_to_xe(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+ trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head,
+ desc_read(xe, g2h, tail),
+ g2h->info.size,
+ g2h->info.space,
+ g2h_fence ?
+ GUC_CTB_HXG_MSG_MAX_LEN :
+ g2h_len);
+
+#define g2h_avail(ct) \
+ (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
+ if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+ g2h_avail(ct), HZ))
+ goto broken;
+#undef g2h_avail
+
+ ret = dequeue_one_g2h(ct);
+ if (ret < 0) {
+ if (ret != -ECANCELED)
+ xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
+ ERR_PTR(ret));
+ goto broken;
+ }
+
+ goto try_again;
+ }
+
+ return ret;
+
+broken:
+ xe_gt_err(gt, "No forward process on H2G, reset required\n");
+ CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK);
+
+ return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+ int ret;
+
+ xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
+
+ mutex_lock(&ct->lock);
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+ if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+ return false;
+
+#define ct_alive(ct) \
+ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
+ !ct->ctbs.g2h.info.broken)
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ return false;
+#undef ct_alive
+
+ return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer, bool no_fail)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct g2h_fence g2h_fence;
+ int ret = 0;
+
+ /*
+ * We use a fence to implement blocking sends / receiving response data.
+ * The seqno of the fence is sent in the H2G, returned in the G2H, and
+ * an xarray is used as storage media with the seqno being to key.
+ * Fields in the fence hold success, failure, retry status and the
+ * response data. Safe to allocate on the stack as the xarray is the
+ * only reference and it cannot be present after this function exits.
+ */
+retry:
+ g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+ ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+ if (unlikely(ret == -ENOMEM)) {
+ /* Retry allocation /w GFP_KERNEL */
+ ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL));
+ if (ret)
+ return ret;
+
+ goto retry_same_fence;
+ } else if (unlikely(ret)) {
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ if (no_fail && retry_failure(ct, ret))
+ goto retry_same_fence;
+
+ if (!g2h_fence_needs_alloc(&g2h_fence))
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+
+ return ret;
+ }
+
+ ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+<<<<<<<
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+=======
+ * Occasionally it is seen that the G2H worker starts running after a delay of more than
+ * a second even after being queued and activated by the Linux workqueue subsystem. This
+ * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
+ * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS
+ * and this is beyond xe kmd.
+ *
+ * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
+ */
+ if (!ret) {
+ flush_work(&ct->g2h_worker);
+ if (g2h_fence.done) {
+ xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
+ g2h_fence.seqno, action[0]);
+ ret = 1;
+ }
+ }
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+>>>>>>>
+ mutex_unlock(&ct->lock);
+ return -ETIME;
+ }
+
+ if (g2h_fence.retry) {
+ xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
+ action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
+ goto retry;
+ }
+ if (g2h_fence.fail) {
+ xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n",
+ action[0], g2h_fence.error, g2h_fence.hint);
+ ret = -EIO;
+ }
+
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+/**
+ * xe_guc_ct_send_recv - Send and receive HXG to the GuC
+ * @ct: the &xe_guc_ct
+ * @action: the dword array with `HXG Request`_ message (can't be NULL)
+ * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
+ * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
+ *
+ * Send a `HXG Request`_ message to the GuC over CT communication channel and
+ * blocks until GuC replies with a `HXG Response`_ message.
+ *
+ * For non-blocking communication with GuC use xe_guc_ct_send().
+ *
+ * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
+ *
+ * Return: response length (in dwords) if &response_buffer was not NULL, or
+ * DATA0 from `HXG Response`_ if &response_buffer was NULL, or
+ * a negative error code on failure.
+ */
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer)
+{
+ KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
+ return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 *response_buffer)
+{
+ return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static u32 *msg_to_hxg(u32 *msg)
+{
+ return msg + GUC_CTB_MSG_MIN_LEN;
+}
+
+static u32 msg_len_to_hxg_len(u32 len)
+{
+ return len - GUC_CTB_MSG_MIN_LEN;
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ lockdep_assert_held(&ct->lock);
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ g2h_release_space(ct, len);
+ }
+
+ return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+ u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ struct g2h_fence *g2h_fence;
+
+ lockdep_assert_held(&ct->lock);
+
+ /*
+ * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
+ * Those messages should never fail, so if we do get an error back it
+ * means we're likely doing an illegal operation and the GuC is
+ * rejecting it. We have no way to inform the code that submitted the
+ * H2G that the message was rejected, so we need to escalate the
+ * failure to trigger a reset.
+ */
+ if (fence & CT_SEQNO_UNTRACKED) {
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
+ xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
+ fence,
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
+ else
+ xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
+ type, fence);
+ CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
+
+ return -EPROTO;
+ }
+
+ g2h_fence = xa_erase(&ct->fence_lookup, fence);
+ if (unlikely(!g2h_fence)) {
+ /* Don't tear down channel, as send could've timed out */
+ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
+ xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+ return 0;
+ }
+
+ xe_gt_assert(gt, fence == g2h_fence->seqno);
+
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+ g2h_fence->fail = true;
+ g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
+ g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
+ } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+ g2h_fence->retry = true;
+ g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
+ } else if (g2h_fence->response_buffer) {
+ g2h_fence->response_len = hxg_len;
+ memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
+ } else {
+ g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
+ }
+
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+ g2h_fence->done = true;
+ smp_mb();
+
+ wake_up_all(&ct->g2h_fence_wq);
+
+ return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 origin, type;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
+ if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+ xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
+ origin);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN);
+
+ return -EPROTO;
+ }
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
+ switch (type) {
+ case GUC_HXG_TYPE_EVENT:
+ ret = parse_g2h_event(ct, msg, len);
+ break;
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ ret = parse_g2h_response(ct, msg, len);
+ break;
+ default:
+ xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
+ type);
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE);
+
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action, adj_len;
+ u32 *payload;
+ int ret = 0;
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
+ adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+ ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ /* Selftest only at the moment */
+ break;
+ case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+ /* FIXME: Handle this */
+ break;
+ case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+ ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+ ret = xe_guc_access_counter_notify_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
+ break;
+ case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
+ ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
+ ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
+ break;
+ default:
+ xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, PROCESS_FAILED);
+ }
+
+ return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+ u32 tail, head, len, desc_status;
+ s32 avail;
+ u32 action;
+ u32 *hxg;
+
+ xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (ct->state == XE_GUC_CT_STATE_DISABLED)
+ return -ENODEV;
+
+ if (ct->state == XE_GUC_CT_STATE_STOPPED)
+ return -ECANCELED;
+
+ if (g2h->info.broken)
+ return -EPIPE;
+
+ xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
+ desc_status = desc_read(xe, g2h, status);
+ if (desc_status) {
+ if (desc_status & GUC_CTB_STATUS_DISABLED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
+ desc_status &= ~GUC_CTB_STATUS_DISABLED;
+ }
+
+ if (desc_status) {
+ xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, g2h, tail);
+ /*
+ u32 desc_head = desc_read(xe, g2h, head);
+
+ * info.head and desc_head are updated back-to-back at the end of
+ * this function and nowhere else. Hence, they cannot be different
+ * unless two g2h_read calls are running concurrently. Which is not
+ * possible because it is guarded by ct->fast_lock. And yet, some
+ * discrete platforms are reguarly hitting this error :(.
+ *
+ * desc_head rolling backwards shouldn't cause any noticeable
+ * problems - just a delay in GuC being allowed to proceed past that
+ * point in the queue. So for now, just disable the error until it
+ * can be root caused.
+ *
+ if (g2h->info.head != desc_head) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT read: head was modified %u != %u\n",
+ desc_head, g2h->info.head);
+ goto corrupted;
+ }
+ */
+
+ if (g2h->info.head > g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
+ g2h->info.head, g2h->info.size);
+ goto corrupted;
+ }
+
+ if (desc_tail >= g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n",
+ desc_tail, g2h->info.size);
+ goto corrupted;
+ }
+ }
+
+ /* Calculate DW available to read */
+ tail = desc_read(xe, g2h, tail);
+ avail = tail - g2h->info.head;
+ if (unlikely(avail == 0))
+ return 0;
+
+ if (avail < 0)
+ avail += g2h->info.size;
+
+ /* Read header */
+ xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
+ sizeof(u32));
+ len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+ if (len > avail) {
+ xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
+ avail, len);
+ goto corrupted;
+ }
+
+ head = (g2h->info.head + 1) % g2h->info.size;
+ avail = len - 1;
+
+ /* Read G2H message */
+ if (avail + head > g2h->info.size) {
+ u32 avail_til_wrap = g2h->info.size - head;
+
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail_til_wrap * sizeof(u32));
+ xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+ &g2h->cmds, 0,
+ (avail - avail_til_wrap) * sizeof(u32));
+ } else {
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail * sizeof(u32));
+ }
+
+ hxg = msg_to_hxg(msg);
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+
+ if (fast_path) {
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ break; /* Process these in fast-path */
+ default:
+ return 0;
+ }
+ }
+
+ /* Update local / descriptor header */
+ g2h->info.head = (head + avail) % g2h->info.size;
+ desc_write(xe, g2h, head, g2h->info.head);
+
+ trace_xe_guc_ctb_g2h(xe, ct_to_gt(ct)->info.id,
+ action, len, g2h->info.head, tail);
+
+ return len;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ);
+ return -EPROTO;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
+ u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
+ int ret = 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ __g2h_release_space(ct, len);
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ default:
+ xe_gt_warn(gt, "NOT_POSSIBLE");
+ }
+
+ if (ret) {
+ xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+ action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, FAST_G2H);
+ }
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ bool ongoing;
+ int len;
+
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ spin_lock(&ct->fast_lock);
+ do {
+ len = g2h_read(ct, ct->fast_msg, true);
+ if (len > 0)
+ g2h_fast_path(ct, ct->fast_msg, len);
+ } while (len > 0);
+ spin_unlock(&ct->fast_lock);
+
+ if (ongoing)
+ xe_pm_runtime_put(xe);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+ int len;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ spin_lock_irq(&ct->fast_lock);
+ len = g2h_read(ct, ct->msg, false);
+ spin_unlock_irq(&ct->fast_lock);
+ if (len <= 0)
+ return len;
+
+ ret = parse_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = process_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 1;
+}
+
+static void receive_g2h(struct xe_guc_ct *ct)
+{
+ bool ongoing;
+ int ret;
+
+ /*
+ * Normal users must always hold mem_access.ref around CT calls. However
+ * during the runtime pm callbacks we rely on CT to talk to the GuC, but
+ * at this stage we can't rely on mem_access.ref and even the
+ * callback_task will be different than current. For such cases we just
+ * need to ensure we always process the responses from any blocking
+ * ct_send requests or where we otherwise expect some response when
+ * initiated from those callbacks (which will need to wait for the below
+ * dequeue_one_g2h()). The dequeue_one_g2h() will gracefully fail if
+ * the device has suspended to the point that the CT communication has
+ * been disabled.
+ *
+ * If we are inside the runtime pm callback, we can be the only task
+ * still issuing CT requests (since that requires having the
+ * mem_access.ref). It seems like it might in theory be possible to
+ * receive unsolicited events from the GuC just as we are
+ * suspending-resuming, but those will currently anyway be lost when
+ * eventually exiting from suspend, hence no need to wake up the device
+ * here. If we ever need something stronger than get_if_ongoing() then
+ * we need to be careful with blocking the pm callbacks from getting CT
+ * responses, if the worker here is blocked on those callbacks
+ * completing, creating a deadlock.
+ */
+ ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
+ if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+ return;
+
+ do {
+ mutex_lock(&ct->lock);
+ ret = dequeue_one_g2h(ct);
+ mutex_unlock(&ct->lock);
+
+ if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+ xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
+ CT_DEAD(ct, NULL, G2H_RECV);
+ kick_reset(ct);
+ }
+ } while (ret == 1);
+
+ if (ongoing)
+ xe_pm_runtime_put(ct_to_xe(ct));
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+
+ receive_g2h(ct);
+}
+
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
+
+ if (ct->bo) {
+ snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
+ }
+
+ return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+ struct guc_ctb_snapshot *snapshot)
+{
+ xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+ sizeof(struct guc_ct_buffer_desc));
+ memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+}
+
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+ drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
+ drm_printf(p, "\thead: %d\n", snapshot->info.head);
+ drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+ drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+ drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+ drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+ drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+ drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+ bool atomic)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
+ if (!snapshot) {
+ xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
+ return NULL;
+ }
+
+ if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
+ snapshot->ct_enabled = true;
+ snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
+ }
+
+ if (ct->bo && snapshot->ctb)
+ xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
+ return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ if (!snapshot)
+ return;
+
+ if (snapshot->ct_enabled) {
+ drm_puts(p, "H2G CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->h2g, p);
+
+ drm_puts(p, "G2H CTB (all sizes in DW):\n");
+ guc_ctb_snapshot_print(&snapshot->g2h, p);
+ drm_printf(p, "\tg2h outstanding: %d\n",
+ snapshot->g2h_outstanding);
+
+ if (snapshot->ctb) {
+ xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+ } else {
+ drm_printf(p, "CTB snapshot missing!\n");
+ return;
+ }
+ } else {
+ drm_puts(p, "CT disabled\n");
+ }
+}
+
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+ if (!snapshot)
+ return;
+
+ kfree(snapshot->ctb);
+ kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+ struct xe_guc_ct_snapshot *snapshot;
+
+ snapshot = xe_guc_ct_snapshot_capture(ct, false);
+ xe_guc_ct_snapshot_print(snapshot, p);
+ xe_guc_ct_snapshot_free(snapshot);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
+{
+ struct xe_guc_log_snapshot *snapshot_log;
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ struct xe_guc *guc = ct_to_guc(ct);
+ unsigned long flags;
+ bool have_capture;
+
+ if (ctb)
+ ctb->info.broken = true;
+
+ /* Ignore further errors after the first dump until a reset */
+ if (ct->dead.reported)
+ return;
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ /* And only capture one dump at a time */
+ have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
+ ct->dead.reason |= (1 << reason_code) |
+ (1 << CT_DEAD_STATE_CAPTURE);
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ if (have_capture)
+ return;
+
+ snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct), true);
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ if (ct->dead.snapshot_log || ct->dead.snapshot_ct) {
+ xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n");
+ xe_guc_log_snapshot_free(snapshot_log);
+ xe_guc_ct_snapshot_free(snapshot_ct);
+ } else {
+ ct->dead.snapshot_log = snapshot_log;
+ ct->dead.snapshot_ct = snapshot_ct;
+ }
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ queue_work(system_unbound_wq, &(ct)->dead.worker);
+}
+
+static void ct_dead_print(struct xe_dead_ct *dead)
+{
+ struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead);
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ if (!dead->reason) {
+ xe_gt_err(gt, "CTB is dead for no reason!?\n");
+ return;
+ }
+
+ drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
+
+ /* Can't generate a genuine core dump at this point, so just do the good bits */
+ drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ xe_device_snapshot_print(xe, &lp);
+
+ drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
+ drm_printf(&lp, "\tTile: %d\n", gt->tile->id);
+
+ drm_puts(&lp, "**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(dead->snapshot_log, &lp);
+
+ drm_puts(&lp, "**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp);
+
+ drm_puts(&lp, "Done.\n");
+}
+
+static void ct_dead_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker);
+
+ if (!ct->dead.reported) {
+ ct->dead.reported = true;
+ ct_dead_print(&ct->dead);
+ }
+
+ spin_lock_irq(&ct->dead.lock);
+
+ xe_guc_log_snapshot_free(ct->dead.snapshot_log);
+ ct->dead.snapshot_log = NULL;
+ xe_guc_ct_snapshot_free(ct->dead.snapshot_ct);
+ ct->dead.snapshot_ct = NULL;
+
+ if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) {
+ /* A reset has occurred so re-arm the error reporting */
+ ct->dead.reason = 0;
+ ct->dead.reported = false;
+ }
+
+ spin_unlock_irq(&ct->dead.lock);
+}
+#endif
diff --git a/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/postimage.2 b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/postimage.2
new file mode 100644
index 000000000000..f6c87d14f78c
--- /dev/null
+++ b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/postimage.2
@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/aperture.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+#include <linux/units.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "display/xe_display.h"
+#include "instructions/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_devcoredump.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
+#include "xe_hw_engine_group.h"
+#include "xe_hwmon.h"
+#include "xe_irq.h"
+#include "xe_memirq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_observation.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_sriov.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_vram.h"
+#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_drm_client *client;
+ struct xe_file *xef;
+ int ret = -ENOMEM;
+ struct task_struct *task = NULL;
+
+ xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+ if (!xef)
+ return ret;
+
+ client = xe_drm_client_alloc();
+ if (!client) {
+ kfree(xef);
+ return ret;
+ }
+
+ xef->drm = file;
+ xef->client = client;
+ xef->xe = xe;
+
+ mutex_init(&xef->vm.lock);
+ xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xef->exec_queue.lock);
+ xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count++;
+ spin_unlock(&xe->clients.lock);
+
+ file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
+ task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
+ if (task) {
+ xef->process_name = kstrdup(task->comm, GFP_KERNEL);
+ xef->pid = task->pid;
+ put_task_struct(task);
+ }
+
+ return 0;
+}
+
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef->process_name);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = file->driver_priv;
+ struct xe_vm *vm;
+ struct xe_exec_queue *q;
+ unsigned long idx;
+
+ xe_pm_runtime_get(xe);
+
+ /*
+ * No need for exec_queue.lock here as there is no contention for it
+ * when FD is closing as IOCTLs presumably can't be modifying the
+ * xarray. Taking exec_queue.lock here causes undue dependency on
+ * vm->lock taken during xe_exec_queue_kill().
+ */
+ xa_for_each(&xef->exec_queue.xa, idx, q) {
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+ xe_exec_queue_kill(q);
+ xe_exec_queue_put(q);
+ }
+ xa_for_each(&xef->vm.xa, idx, vm)
+ xe_vm_close_and_put(vm);
+
+ xe_file_put(xef);
+
+ xe_pm_runtime_put(xe);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+};
+
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_compat_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
+static const struct file_operations xe_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release_noglobal,
+ .unlocked_ioctl = xe_drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = xe_drm_compat_ioctl,
+ .llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+static struct drm_driver driver = {
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
+ */
+ .driver_features =
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+ .open = xe_file_open,
+ .postclose = xe_file_close,
+
+ .gem_prime_import = xe_gem_prime_import,
+
+ .dumb_create = xe_bo_dumb_create,
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = xe_drm_client_fdinfo,
+#endif
+ .ioctls = xe_ioctls,
+ .num_ioctls = ARRAY_SIZE(xe_ioctls),
+ .fops = &xe_driver_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
+ if (xe->ordered_wq)
+ destroy_workqueue(xe->ordered_wq);
+
+ if (xe->unordered_wq)
+ destroy_workqueue(xe->unordered_wq);
+
+ if (xe->destroy_wq)
+ destroy_workqueue(xe->destroy_wq);
+
+ ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct xe_device *xe;
+ int err;
+
+ xe_display_driver_set_hooks(&driver);
+
+ err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
+ if (err)
+ return ERR_PTR(err);
+
+ xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+ if (IS_ERR(xe))
+ return xe;
+
+ err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+ xe->drm.anon_inode->i_mapping,
+ xe->drm.vma_offset_manager, false, false);
+ if (WARN_ON(err))
+ goto err;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+ if (err)
+ goto err;
+
+ xe->info.devid = pdev->device;
+ xe->info.revid = pdev->revision;
+ xe->info.force_execlist = xe_modparam.force_execlist;
+
+ spin_lock_init(&xe->irq.lock);
+ spin_lock_init(&xe->clients.lock);
+
+ init_waitqueue_head(&xe->ufence_wq);
+
+ init_rwsem(&xe->usm.lock);
+
+ xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ /* Trigger a large asid and an early asid wrap. */
+ u32 asid;
+
+ BUILD_BUG_ON(XE_MAX_ASID < 2);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+ XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ drm_WARN_ON(&xe->drm, err);
+ if (err >= 0)
+ xa_erase(&xe->usm.asid_to_vm, asid);
+ }
+
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.evicted);
+
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
+ xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+ xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq || !xe->destroy_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
+ drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = xe_display_create(xe);
+ if (WARN_ON(err))
+ goto err;
+
+ return xe;
+
+err:
+ return ERR_PTR(err);
+}
+ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
+
+static bool xe_driver_flr_disabled(struct xe_device *xe)
+{
+ return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void __xe_driver_flr(struct xe_device *xe)
+{
+ const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ int ret;
+
+ drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+ /*
+ * Make sure any pending FLR requests have cleared by waiting for the
+ * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+ * to make sure it's not still set from a prior attempt (it's a write to
+ * clear bit).
+ * Note that we should never be in a situation where a previous attempt
+ * is still pending (unless the HW is totally dead), but better to be
+ * safe in case something unexpected happens
+ */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+ return;
+ }
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+
+ /* Trigger the actual Driver-FLR */
+ xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
+
+ /* Wait for hardware teardown to complete */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Wait for hardware/firmware re-init to complete */
+ ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Clear sticky completion status */
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr(struct xe_device *xe)
+{
+ if (xe_driver_flr_disabled(xe)) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ __xe_driver_flr(xe);
+}
+
+static void xe_driver_flr_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (xe->needs_flr_on_fini)
+ xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+ unsigned int mask_size = xe->info.dma_mask_size;
+ int err;
+
+ dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+ err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ return 0;
+
+mask_err:
+ drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+ return err;
+}
+
+static bool verify_lmem_ready(struct xe_device *xe)
+{
+ u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
+
+ return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+ unsigned long timeout, start;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ if (verify_lmem_ready(xe))
+ return 0;
+
+ drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+ start = jiffies;
+ timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+ do {
+ if (signal_pending(current))
+ return -EINTR;
+
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ if (time_after(jiffies, timeout)) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
+
+ msleep(20);
+
+ } while (!verify_lmem_ready(xe));
+
+ drm_dbg(&xe->drm, "lmem ready after %ums",
+ jiffies_to_msecs(jiffies - start));
+
+ return 0;
+}
+ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
+
+static void update_device_info(struct xe_device *xe)
+{
+ /* disable features that are not available/applicable to VFs */
+ if (IS_SRIOV_VF(xe)) {
+ xe->info.probe_display = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
+ }
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+ int err;
+
+ err = xe_mmio_init(xe);
+ if (err)
+ return err;
+
+ xe_sriov_probe_early(xe);
+
+ update_device_info(xe);
+
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
+ err = wait_for_lmem_ready(xe);
+ if (err)
+ return err;
+
+ xe->wedged.mode = xe_modparam.wedged_mode;
+
+ return 0;
+}
+
+static int probe_has_flat_ccs(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u32 reg;
+ int err;
+
+ /* Always enabled/disabled, no runtime check to do */
+ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+ return 0;
+
+ gt = xe_root_mmio_gt(xe);
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+ xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+ if (!xe->info.has_flat_ccs)
+ drm_dbg(&xe->drm,
+ "Flat CCS has been disabled in bios, May lead to performance impact");
+
+ return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+ int err;
+ u8 last_gt;
+ u8 id;
+
+ xe_pat_init_early(xe);
+
+ err = xe_sriov_init(xe);
+ if (err)
+ return err;
+
+ xe->info.mem_region_mask = 1;
+ err = xe_display_init_nommio(xe);
+ if (err)
+ return err;
+
+ err = xe_set_dma_info(xe);
+ if (err)
+ return err;
+
+ err = xe_mmio_probe_tiles(xe);
+ if (err)
+ return err;
+
+ xe_ttm_sys_mgr_init(xe);
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_early(gt);
+ if (err)
+ return err;
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
+ }
+
+ for_each_tile(tile, xe, id) {
+ if (IS_SRIOV_VF(xe)) {
+ xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
+ err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
+ if (err)
+ return err;
+ err = xe_gt_sriov_vf_query_config(tile->primary_gt);
+ if (err)
+ return err;
+ }
+ err = xe_ggtt_init_early(tile->mem.ggtt);
+ if (err)
+ return err;
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_hwconfig(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
+ err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
+ err = xe_display_init_noirq(xe);
+ if (err)
+ return err;
+
+ err = xe_irq_install(xe);
+ if (err)
+ goto err;
+
+ err = probe_has_flat_ccs(xe);
+ if (err)
+ goto err;
+
+ err = xe_vram_probe(xe);
+ if (err)
+ goto err;
+
+ for_each_tile(tile, xe, id) {
+ err = xe_tile_init_noalloc(tile);
+ if (err)
+ goto err;
+ }
+
+ /* Allocate and map stolen after potential VRAM resize */
+ xe_ttm_stolen_mgr_init(xe);
+
+ /*
+ * Now that GT is initialized (TTM in particular),
+ * we can try to init display, and inherit the initial fb.
+ * This is the reason the first allocation needs to be done
+ * inside display.
+ */
+ err = xe_display_init_noaccel(xe);
+ if (err)
+ goto err;
+
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ err = xe_gt_init(gt);
+ if (err)
+ goto err_fini_gt;
+ }
+
+ xe_heci_gsc_init(xe);
+
+ err = xe_oa_init(xe);
+ if (err)
+ goto err_fini_gt;
+
+ err = xe_display_init(xe);
+ if (err)
+ goto err_fini_oa;
+
+ err = drm_dev_register(&xe->drm, 0);
+ if (err)
+ goto err_fini_display;
+
+ xe_display_register(xe);
+
+ xe_oa_register(xe);
+
+ xe_debugfs_register(xe);
+
+ xe_hwmon_register(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize_freq(gt);
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
+
+err_fini_display:
+ xe_display_driver_remove(xe);
+
+err_fini_oa:
+ xe_oa_fini(xe);
+
+err_fini_gt:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_gt_remove(gt);
+ else
+ break;
+ }
+
+err:
+ xe_display_fini(xe);
+ return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+ xe_display_unregister(xe);
+
+ drm_dev_unplug(&xe->drm);
+ xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ xe_oa_unregister(xe);
+
+ xe_device_remove_display(xe);
+
+ xe_display_fini(xe);
+
+ xe_oa_fini(xe);
+
+ xe_heci_gsc_fini(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_remove(gt);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_dbg(&xe->drm, "Shutting down device\n");
+
+ if (xe_driver_flr_disabled(xe)) {
+ xe_display_pm_shutdown(xe);
+
+ xe_irq_suspend(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
+
+ xe_display_pm_shutdown_late(xe);
+ } else {
+ /* BOOM! */
+ __xe_driver_flr(xe);
+ }
+}
+
+/**
+ * xe_device_wmb() - Device specific write memory barrier
+ * @xe: the &xe_device
+ *
+ * While wmb() is sufficient for a barrier if we use system memory, on discrete
+ * platforms with device memory we additionally need to issue a register write.
+ * Since it doesn't matter which register we write to, use the read-only VF_CAP
+ * register that is also marked as accessible by the VFs.
+ */
+void xe_device_wmb(struct xe_device *xe)
+{
+ wmb();
+ if (IS_DGFX(xe))
+ xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
+}
+
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ return;
+
+ xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ /*
+ * FIXME: We can likely do better here with our choice of
+ * timeout. Currently we just assume the worst case, i.e. 150us,
+ * which is believed to be sufficient to cover the worst case
+ * scenario on current platforms if all cache entries are
+ * transient and need to be flushed..
+ */
+ if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ 150, NULL, false))
+ xe_gt_err_once(gt, "TD flush timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+}
+
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
+
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+ return xe_device_has_flat_ccs(xe) ?
+ DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+ xe_assert(xe, !xe_pm_runtime_suspended(xe));
+}
+
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+
+ for_each_gt(gt, xe, id) {
+ drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tTile: %u\n", gt->tile->id);
+ drm_printf(p, "\tType: %s\n",
+ gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ drm_printf(p, "\tIP ver: %u.%u.%u\n",
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ }
+}
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return sign_extend64(address, xe->info.va_bits - 1);
+}
+
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
+}
+
+static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a mudule
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (xe->wedged.mode == 0) {
+ drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
+ return;
+ }
+
+ xe_pm_runtime_get_noresume(xe);
+
+ if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
+ drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
+ return;
+ }
+
+ if (!atomic_xchg(&xe->wedged.flag, 1)) {
+ xe->needs_flr_on_fini = true;
+ drm_err(&xe->drm,
+ "CRITICAL: Xe has declared device %s as wedged.\n"
+ "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+ "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+ dev_name(xe->drm.dev));
+ }
+
+ for_each_gt(gt, xe, id)
+ xe_gt_declare_wedged(gt);
+}
diff --git a/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage
new file mode 100644
index 000000000000..daa2b53c6258
--- /dev/null
+++ b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage
@@ -0,0 +1,1041 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/aperture.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+#include <linux/units.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "display/xe_display.h"
+#include "instructions/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_devcoredump.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
+#include "xe_hw_engine_group.h"
+#include "xe_hwmon.h"
+#include "xe_irq.h"
+#include "xe_memirq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_observation.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_sriov.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_vram.h"
+#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_drm_client *client;
+ struct xe_file *xef;
+ int ret = -ENOMEM;
+ struct task_struct *task = NULL;
+
+ xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+ if (!xef)
+ return ret;
+
+ client = xe_drm_client_alloc();
+ if (!client) {
+ kfree(xef);
+ return ret;
+ }
+
+ xef->drm = file;
+ xef->client = client;
+ xef->xe = xe;
+
+ mutex_init(&xef->vm.lock);
+ xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xef->exec_queue.lock);
+ xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count++;
+ spin_unlock(&xe->clients.lock);
+
+ file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
+ task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
+ if (task) {
+ xef->process_name = kstrdup(task->comm, GFP_KERNEL);
+ xef->pid = task->pid;
+ put_task_struct(task);
+ }
+
+ return 0;
+}
+
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef->process_name);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = file->driver_priv;
+ struct xe_vm *vm;
+ struct xe_exec_queue *q;
+ unsigned long idx;
+
+ xe_pm_runtime_get(xe);
+
+ /*
+ * No need for exec_queue.lock here as there is no contention for it
+ * when FD is closing as IOCTLs presumably can't be modifying the
+ * xarray. Taking exec_queue.lock here causes undue dependency on
+ * vm->lock taken during xe_exec_queue_kill().
+ */
+ xa_for_each(&xef->exec_queue.xa, idx, q) {
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+ xe_exec_queue_kill(q);
+ xe_exec_queue_put(q);
+ }
+ xa_for_each(&xef->vm.xa, idx, vm)
+ xe_vm_close_and_put(vm);
+
+ xe_file_put(xef);
+
+ xe_pm_runtime_put(xe);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+};
+
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_compat_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
+static const struct file_operations xe_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release_noglobal,
+ .unlocked_ioctl = xe_drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = xe_drm_compat_ioctl,
+ .llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+static struct drm_driver driver = {
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
+ */
+ .driver_features =
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+ .open = xe_file_open,
+ .postclose = xe_file_close,
+
+ .gem_prime_import = xe_gem_prime_import,
+
+ .dumb_create = xe_bo_dumb_create,
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = xe_drm_client_fdinfo,
+#endif
+ .ioctls = xe_ioctls,
+ .num_ioctls = ARRAY_SIZE(xe_ioctls),
+ .fops = &xe_driver_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
+ if (xe->ordered_wq)
+ destroy_workqueue(xe->ordered_wq);
+
+ if (xe->unordered_wq)
+ destroy_workqueue(xe->unordered_wq);
+
+ if (xe->destroy_wq)
+ destroy_workqueue(xe->destroy_wq);
+
+ ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct xe_device *xe;
+ int err;
+
+ xe_display_driver_set_hooks(&driver);
+
+ err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
+ if (err)
+ return ERR_PTR(err);
+
+ xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+ if (IS_ERR(xe))
+ return xe;
+
+ err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+ xe->drm.anon_inode->i_mapping,
+ xe->drm.vma_offset_manager, false, false);
+ if (WARN_ON(err))
+ goto err;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+ if (err)
+ goto err;
+
+ xe->info.devid = pdev->device;
+ xe->info.revid = pdev->revision;
+ xe->info.force_execlist = xe_modparam.force_execlist;
+
+ spin_lock_init(&xe->irq.lock);
+ spin_lock_init(&xe->clients.lock);
+
+ init_waitqueue_head(&xe->ufence_wq);
+
+ init_rwsem(&xe->usm.lock);
+
+ xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ /* Trigger a large asid and an early asid wrap. */
+ u32 asid;
+
+ BUILD_BUG_ON(XE_MAX_ASID < 2);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+ XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ drm_WARN_ON(&xe->drm, err);
+ if (err >= 0)
+ xa_erase(&xe->usm.asid_to_vm, asid);
+ }
+
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.evicted);
+
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
+ xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+ xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq || !xe->destroy_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
+ drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = xe_display_create(xe);
+ if (WARN_ON(err))
+ goto err;
+
+ return xe;
+
+err:
+ return ERR_PTR(err);
+}
+ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
+
+static bool xe_driver_flr_disabled(struct xe_device *xe)
+{
+ return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void __xe_driver_flr(struct xe_device *xe)
+{
+ const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ int ret;
+
+ drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+ /*
+ * Make sure any pending FLR requests have cleared by waiting for the
+ * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+ * to make sure it's not still set from a prior attempt (it's a write to
+ * clear bit).
+ * Note that we should never be in a situation where a previous attempt
+ * is still pending (unless the HW is totally dead), but better to be
+ * safe in case something unexpected happens
+ */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+ return;
+ }
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+
+ /* Trigger the actual Driver-FLR */
+ xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
+
+ /* Wait for hardware teardown to complete */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Wait for hardware/firmware re-init to complete */
+ ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Clear sticky completion status */
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr(struct xe_device *xe)
+{
+ if (xe_driver_flr_disabled(xe)) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ __xe_driver_flr(xe);
+}
+
+static void xe_driver_flr_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (xe->needs_flr_on_fini)
+ xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+ unsigned int mask_size = xe->info.dma_mask_size;
+ int err;
+
+ dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+ err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ return 0;
+
+mask_err:
+ drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+ return err;
+}
+
+static bool verify_lmem_ready(struct xe_device *xe)
+{
+ u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
+
+ return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+ unsigned long timeout, start;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ if (verify_lmem_ready(xe))
+ return 0;
+
+ drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+ start = jiffies;
+ timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+ do {
+ if (signal_pending(current))
+ return -EINTR;
+
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ if (time_after(jiffies, timeout)) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
+
+ msleep(20);
+
+ } while (!verify_lmem_ready(xe));
+
+ drm_dbg(&xe->drm, "lmem ready after %ums",
+ jiffies_to_msecs(jiffies - start));
+
+ return 0;
+}
+ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
+
+static void update_device_info(struct xe_device *xe)
+{
+ /* disable features that are not available/applicable to VFs */
+ if (IS_SRIOV_VF(xe)) {
+ xe->info.probe_display = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
+ }
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+ int err;
+
+ err = xe_mmio_init(xe);
+ if (err)
+ return err;
+
+ xe_sriov_probe_early(xe);
+
+ update_device_info(xe);
+
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
+ err = wait_for_lmem_ready(xe);
+ if (err)
+ return err;
+
+ xe->wedged.mode = xe_modparam.wedged_mode;
+
+ return 0;
+}
+
+static int probe_has_flat_ccs(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u32 reg;
+ int err;
+
+ /* Always enabled/disabled, no runtime check to do */
+ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+ return 0;
+
+ gt = xe_root_mmio_gt(xe);
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+ xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+ if (!xe->info.has_flat_ccs)
+ drm_dbg(&xe->drm,
+ "Flat CCS has been disabled in bios, May lead to performance impact");
+
+ return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+ int err;
+ u8 last_gt;
+ u8 id;
+
+ xe_pat_init_early(xe);
+
+ err = xe_sriov_init(xe);
+ if (err)
+ return err;
+
+ xe->info.mem_region_mask = 1;
+ err = xe_display_init_nommio(xe);
+ if (err)
+ return err;
+
+ err = xe_set_dma_info(xe);
+ if (err)
+ return err;
+
+ err = xe_mmio_probe_tiles(xe);
+ if (err)
+ return err;
+
+ xe_ttm_sys_mgr_init(xe);
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_early(gt);
+ if (err)
+ return err;
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
+ }
+
+ for_each_tile(tile, xe, id) {
+ if (IS_SRIOV_VF(xe)) {
+ xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
+ err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
+ if (err)
+ return err;
+ err = xe_gt_sriov_vf_query_config(tile->primary_gt);
+ if (err)
+ return err;
+ }
+ err = xe_ggtt_init_early(tile->mem.ggtt);
+ if (err)
+ return err;
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_hwconfig(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
+ err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
+ err = xe_display_init_noirq(xe);
+ if (err)
+ return err;
+
+ err = xe_irq_install(xe);
+ if (err)
+ goto err;
+
+ err = probe_has_flat_ccs(xe);
+ if (err)
+ goto err;
+
+ err = xe_vram_probe(xe);
+ if (err)
+ goto err;
+
+ for_each_tile(tile, xe, id) {
+ err = xe_tile_init_noalloc(tile);
+ if (err)
+ goto err;
+ }
+
+ /* Allocate and map stolen after potential VRAM resize */
+ xe_ttm_stolen_mgr_init(xe);
+
+ /*
+ * Now that GT is initialized (TTM in particular),
+ * we can try to init display, and inherit the initial fb.
+ * This is the reason the first allocation needs to be done
+ * inside display.
+ */
+ err = xe_display_init_noaccel(xe);
+ if (err)
+ goto err;
+
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ err = xe_gt_init(gt);
+ if (err)
+ goto err_fini_gt;
+ }
+
+ xe_heci_gsc_init(xe);
+
+ err = xe_oa_init(xe);
+ if (err)
+ goto err_fini_gt;
+
+ err = xe_display_init(xe);
+ if (err)
+ goto err_fini_oa;
+
+ err = drm_dev_register(&xe->drm, 0);
+ if (err)
+ goto err_fini_display;
+
+ xe_display_register(xe);
+
+ xe_oa_register(xe);
+
+ xe_debugfs_register(xe);
+
+ xe_hwmon_register(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize_freq(gt);
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
+
+err_fini_display:
+ xe_display_driver_remove(xe);
+
+err_fini_oa:
+ xe_oa_fini(xe);
+
+err_fini_gt:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_gt_remove(gt);
+ else
+ break;
+ }
+
+err:
+ xe_display_fini(xe);
+ return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+ xe_display_unregister(xe);
+
+ drm_dev_unplug(&xe->drm);
+ xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ xe_oa_unregister(xe);
+
+ xe_device_remove_display(xe);
+
+ xe_display_fini(xe);
+
+ xe_oa_fini(xe);
+
+ xe_heci_gsc_fini(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_remove(gt);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_dbg(&xe->drm, "Shutting down device\n");
+
+ if (xe_driver_flr_disabled(xe)) {
+ xe_display_pm_shutdown(xe);
+
+ xe_irq_suspend(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
+
+ xe_display_pm_shutdown_late(xe);
+ } else {
+ /* BOOM! */
+ __xe_driver_flr(xe);
+ }
+}
+
+/**
+ * xe_device_wmb() - Device specific write memory barrier
+ * @xe: the &xe_device
+ *
+ * While wmb() is sufficient for a barrier if we use system memory, on discrete
+ * platforms with device memory we additionally need to issue a register write.
+ * Since it doesn't matter which register we write to, use the read-only VF_CAP
+ * register that is also marked as accessible by the VFs.
+ */
+void xe_device_wmb(struct xe_device *xe)
+{
+ wmb();
+ if (IS_DGFX(xe))
+ xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
+}
+
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ return;
+
+ xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ /*
+ * FIXME: We can likely do better here with our choice of
+ * timeout. Currently we just assume the worst case, i.e. 150us,
+ * which is believed to be sufficient to cover the worst case
+ * scenario on current platforms if all cache entries are
+ * transient and need to be flushed..
+ */
+ if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ 150, NULL, false))
+ xe_gt_err_once(gt, "TD flush timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+}
+
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
+
+<<<<<<<
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+=======
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+>>>>>>>
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+ return xe_device_has_flat_ccs(xe) ?
+ DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+ xe_assert(xe, !xe_pm_runtime_suspended(xe));
+}
+
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+
+ for_each_gt(gt, xe, id) {
+ drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tTile: %u\n", gt->tile->id);
+ drm_printf(p, "\tType: %s\n",
+ gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ drm_printf(p, "\tIP ver: %u.%u.%u\n",
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ }
+}
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return sign_extend64(address, xe->info.va_bits - 1);
+}
+
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
+}
+
+static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a mudule
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (xe->wedged.mode == 0) {
+ drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
+ return;
+ }
+
+ xe_pm_runtime_get_noresume(xe);
+
+ if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
+ drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
+ return;
+ }
+
+ if (!atomic_xchg(&xe->wedged.flag, 1)) {
+ xe->needs_flr_on_fini = true;
+ drm_err(&xe->drm,
+ "CRITICAL: Xe has declared device %s as wedged.\n"
+ "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+ "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+ dev_name(xe->drm.dev));
+ }
+
+ for_each_gt(gt, xe, id)
+ xe_gt_declare_wedged(gt);
+}
diff --git a/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.1 b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.1
new file mode 100644
index 000000000000..daa2b53c6258
--- /dev/null
+++ b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.1
@@ -0,0 +1,1041 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/aperture.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+#include <linux/units.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "display/xe_display.h"
+#include "instructions/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_devcoredump.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
+#include "xe_hw_engine_group.h"
+#include "xe_hwmon.h"
+#include "xe_irq.h"
+#include "xe_memirq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_observation.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_sriov.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_vram.h"
+#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_drm_client *client;
+ struct xe_file *xef;
+ int ret = -ENOMEM;
+ struct task_struct *task = NULL;
+
+ xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+ if (!xef)
+ return ret;
+
+ client = xe_drm_client_alloc();
+ if (!client) {
+ kfree(xef);
+ return ret;
+ }
+
+ xef->drm = file;
+ xef->client = client;
+ xef->xe = xe;
+
+ mutex_init(&xef->vm.lock);
+ xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xef->exec_queue.lock);
+ xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count++;
+ spin_unlock(&xe->clients.lock);
+
+ file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
+ task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
+ if (task) {
+ xef->process_name = kstrdup(task->comm, GFP_KERNEL);
+ xef->pid = task->pid;
+ put_task_struct(task);
+ }
+
+ return 0;
+}
+
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef->process_name);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = file->driver_priv;
+ struct xe_vm *vm;
+ struct xe_exec_queue *q;
+ unsigned long idx;
+
+ xe_pm_runtime_get(xe);
+
+ /*
+ * No need for exec_queue.lock here as there is no contention for it
+ * when FD is closing as IOCTLs presumably can't be modifying the
+ * xarray. Taking exec_queue.lock here causes undue dependency on
+ * vm->lock taken during xe_exec_queue_kill().
+ */
+ xa_for_each(&xef->exec_queue.xa, idx, q) {
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+ xe_exec_queue_kill(q);
+ xe_exec_queue_put(q);
+ }
+ xa_for_each(&xef->vm.xa, idx, vm)
+ xe_vm_close_and_put(vm);
+
+ xe_file_put(xef);
+
+ xe_pm_runtime_put(xe);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+};
+
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_compat_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
+static const struct file_operations xe_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release_noglobal,
+ .unlocked_ioctl = xe_drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = xe_drm_compat_ioctl,
+ .llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+static struct drm_driver driver = {
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
+ */
+ .driver_features =
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+ .open = xe_file_open,
+ .postclose = xe_file_close,
+
+ .gem_prime_import = xe_gem_prime_import,
+
+ .dumb_create = xe_bo_dumb_create,
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = xe_drm_client_fdinfo,
+#endif
+ .ioctls = xe_ioctls,
+ .num_ioctls = ARRAY_SIZE(xe_ioctls),
+ .fops = &xe_driver_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
+ if (xe->ordered_wq)
+ destroy_workqueue(xe->ordered_wq);
+
+ if (xe->unordered_wq)
+ destroy_workqueue(xe->unordered_wq);
+
+ if (xe->destroy_wq)
+ destroy_workqueue(xe->destroy_wq);
+
+ ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct xe_device *xe;
+ int err;
+
+ xe_display_driver_set_hooks(&driver);
+
+ err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
+ if (err)
+ return ERR_PTR(err);
+
+ xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+ if (IS_ERR(xe))
+ return xe;
+
+ err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+ xe->drm.anon_inode->i_mapping,
+ xe->drm.vma_offset_manager, false, false);
+ if (WARN_ON(err))
+ goto err;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+ if (err)
+ goto err;
+
+ xe->info.devid = pdev->device;
+ xe->info.revid = pdev->revision;
+ xe->info.force_execlist = xe_modparam.force_execlist;
+
+ spin_lock_init(&xe->irq.lock);
+ spin_lock_init(&xe->clients.lock);
+
+ init_waitqueue_head(&xe->ufence_wq);
+
+ init_rwsem(&xe->usm.lock);
+
+ xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ /* Trigger a large asid and an early asid wrap. */
+ u32 asid;
+
+ BUILD_BUG_ON(XE_MAX_ASID < 2);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+ XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ drm_WARN_ON(&xe->drm, err);
+ if (err >= 0)
+ xa_erase(&xe->usm.asid_to_vm, asid);
+ }
+
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.evicted);
+
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
+ xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+ xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq || !xe->destroy_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
+ drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = xe_display_create(xe);
+ if (WARN_ON(err))
+ goto err;
+
+ return xe;
+
+err:
+ return ERR_PTR(err);
+}
+ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
+
+static bool xe_driver_flr_disabled(struct xe_device *xe)
+{
+ return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void __xe_driver_flr(struct xe_device *xe)
+{
+ const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ int ret;
+
+ drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+ /*
+ * Make sure any pending FLR requests have cleared by waiting for the
+ * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+ * to make sure it's not still set from a prior attempt (it's a write to
+ * clear bit).
+ * Note that we should never be in a situation where a previous attempt
+ * is still pending (unless the HW is totally dead), but better to be
+ * safe in case something unexpected happens
+ */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+ return;
+ }
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+
+ /* Trigger the actual Driver-FLR */
+ xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
+
+ /* Wait for hardware teardown to complete */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Wait for hardware/firmware re-init to complete */
+ ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Clear sticky completion status */
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr(struct xe_device *xe)
+{
+ if (xe_driver_flr_disabled(xe)) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ __xe_driver_flr(xe);
+}
+
+static void xe_driver_flr_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (xe->needs_flr_on_fini)
+ xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+ unsigned int mask_size = xe->info.dma_mask_size;
+ int err;
+
+ dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+ err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ return 0;
+
+mask_err:
+ drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+ return err;
+}
+
+static bool verify_lmem_ready(struct xe_device *xe)
+{
+ u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
+
+ return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+ unsigned long timeout, start;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ if (verify_lmem_ready(xe))
+ return 0;
+
+ drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+ start = jiffies;
+ timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+ do {
+ if (signal_pending(current))
+ return -EINTR;
+
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ if (time_after(jiffies, timeout)) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
+
+ msleep(20);
+
+ } while (!verify_lmem_ready(xe));
+
+ drm_dbg(&xe->drm, "lmem ready after %ums",
+ jiffies_to_msecs(jiffies - start));
+
+ return 0;
+}
+ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
+
+static void update_device_info(struct xe_device *xe)
+{
+ /* disable features that are not available/applicable to VFs */
+ if (IS_SRIOV_VF(xe)) {
+ xe->info.probe_display = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
+ }
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+ int err;
+
+ err = xe_mmio_init(xe);
+ if (err)
+ return err;
+
+ xe_sriov_probe_early(xe);
+
+ update_device_info(xe);
+
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
+ err = wait_for_lmem_ready(xe);
+ if (err)
+ return err;
+
+ xe->wedged.mode = xe_modparam.wedged_mode;
+
+ return 0;
+}
+
+static int probe_has_flat_ccs(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u32 reg;
+ int err;
+
+ /* Always enabled/disabled, no runtime check to do */
+ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+ return 0;
+
+ gt = xe_root_mmio_gt(xe);
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+ xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+ if (!xe->info.has_flat_ccs)
+ drm_dbg(&xe->drm,
+ "Flat CCS has been disabled in bios, May lead to performance impact");
+
+ return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+ int err;
+ u8 last_gt;
+ u8 id;
+
+ xe_pat_init_early(xe);
+
+ err = xe_sriov_init(xe);
+ if (err)
+ return err;
+
+ xe->info.mem_region_mask = 1;
+ err = xe_display_init_nommio(xe);
+ if (err)
+ return err;
+
+ err = xe_set_dma_info(xe);
+ if (err)
+ return err;
+
+ err = xe_mmio_probe_tiles(xe);
+ if (err)
+ return err;
+
+ xe_ttm_sys_mgr_init(xe);
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_early(gt);
+ if (err)
+ return err;
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
+ }
+
+ for_each_tile(tile, xe, id) {
+ if (IS_SRIOV_VF(xe)) {
+ xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
+ err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
+ if (err)
+ return err;
+ err = xe_gt_sriov_vf_query_config(tile->primary_gt);
+ if (err)
+ return err;
+ }
+ err = xe_ggtt_init_early(tile->mem.ggtt);
+ if (err)
+ return err;
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_hwconfig(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
+ err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
+ err = xe_display_init_noirq(xe);
+ if (err)
+ return err;
+
+ err = xe_irq_install(xe);
+ if (err)
+ goto err;
+
+ err = probe_has_flat_ccs(xe);
+ if (err)
+ goto err;
+
+ err = xe_vram_probe(xe);
+ if (err)
+ goto err;
+
+ for_each_tile(tile, xe, id) {
+ err = xe_tile_init_noalloc(tile);
+ if (err)
+ goto err;
+ }
+
+ /* Allocate and map stolen after potential VRAM resize */
+ xe_ttm_stolen_mgr_init(xe);
+
+ /*
+ * Now that GT is initialized (TTM in particular),
+ * we can try to init display, and inherit the initial fb.
+ * This is the reason the first allocation needs to be done
+ * inside display.
+ */
+ err = xe_display_init_noaccel(xe);
+ if (err)
+ goto err;
+
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ err = xe_gt_init(gt);
+ if (err)
+ goto err_fini_gt;
+ }
+
+ xe_heci_gsc_init(xe);
+
+ err = xe_oa_init(xe);
+ if (err)
+ goto err_fini_gt;
+
+ err = xe_display_init(xe);
+ if (err)
+ goto err_fini_oa;
+
+ err = drm_dev_register(&xe->drm, 0);
+ if (err)
+ goto err_fini_display;
+
+ xe_display_register(xe);
+
+ xe_oa_register(xe);
+
+ xe_debugfs_register(xe);
+
+ xe_hwmon_register(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize_freq(gt);
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
+
+err_fini_display:
+ xe_display_driver_remove(xe);
+
+err_fini_oa:
+ xe_oa_fini(xe);
+
+err_fini_gt:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_gt_remove(gt);
+ else
+ break;
+ }
+
+err:
+ xe_display_fini(xe);
+ return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+ xe_display_unregister(xe);
+
+ drm_dev_unplug(&xe->drm);
+ xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ xe_oa_unregister(xe);
+
+ xe_device_remove_display(xe);
+
+ xe_display_fini(xe);
+
+ xe_oa_fini(xe);
+
+ xe_heci_gsc_fini(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_remove(gt);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_dbg(&xe->drm, "Shutting down device\n");
+
+ if (xe_driver_flr_disabled(xe)) {
+ xe_display_pm_shutdown(xe);
+
+ xe_irq_suspend(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
+
+ xe_display_pm_shutdown_late(xe);
+ } else {
+ /* BOOM! */
+ __xe_driver_flr(xe);
+ }
+}
+
+/**
+ * xe_device_wmb() - Device specific write memory barrier
+ * @xe: the &xe_device
+ *
+ * While wmb() is sufficient for a barrier if we use system memory, on discrete
+ * platforms with device memory we additionally need to issue a register write.
+ * Since it doesn't matter which register we write to, use the read-only VF_CAP
+ * register that is also marked as accessible by the VFs.
+ */
+void xe_device_wmb(struct xe_device *xe)
+{
+ wmb();
+ if (IS_DGFX(xe))
+ xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
+}
+
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ return;
+
+ xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ /*
+ * FIXME: We can likely do better here with our choice of
+ * timeout. Currently we just assume the worst case, i.e. 150us,
+ * which is believed to be sufficient to cover the worst case
+ * scenario on current platforms if all cache entries are
+ * transient and need to be flushed..
+ */
+ if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ 150, NULL, false))
+ xe_gt_err_once(gt, "TD flush timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+}
+
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
+
+<<<<<<<
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+=======
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+>>>>>>>
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+ return xe_device_has_flat_ccs(xe) ?
+ DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+ xe_assert(xe, !xe_pm_runtime_suspended(xe));
+}
+
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+
+ for_each_gt(gt, xe, id) {
+ drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tTile: %u\n", gt->tile->id);
+ drm_printf(p, "\tType: %s\n",
+ gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ drm_printf(p, "\tIP ver: %u.%u.%u\n",
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ }
+}
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return sign_extend64(address, xe->info.va_bits - 1);
+}
+
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
+}
+
+static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a mudule
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (xe->wedged.mode == 0) {
+ drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
+ return;
+ }
+
+ xe_pm_runtime_get_noresume(xe);
+
+ if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
+ drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
+ return;
+ }
+
+ if (!atomic_xchg(&xe->wedged.flag, 1)) {
+ xe->needs_flr_on_fini = true;
+ drm_err(&xe->drm,
+ "CRITICAL: Xe has declared device %s as wedged.\n"
+ "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+ "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+ dev_name(xe->drm.dev));
+ }
+
+ for_each_gt(gt, xe, id)
+ xe_gt_declare_wedged(gt);
+}
diff --git a/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.2 b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.2
new file mode 100644
index 000000000000..daa2b53c6258
--- /dev/null
+++ b/rr-cache/3202f565d085f3a7e6151c8b138d91393b3520cd/preimage.2
@@ -0,0 +1,1041 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/aperture.h>
+#include <linux/delay.h>
+#include <linux/fault-inject.h>
+#include <linux/units.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "display/xe_display.h"
+#include "instructions/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_devcoredump.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
+#include "xe_hw_engine_group.h"
+#include "xe_hwmon.h"
+#include "xe_irq.h"
+#include "xe_memirq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_observation.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_sriov.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_vram.h"
+#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_drm_client *client;
+ struct xe_file *xef;
+ int ret = -ENOMEM;
+ struct task_struct *task = NULL;
+
+ xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+ if (!xef)
+ return ret;
+
+ client = xe_drm_client_alloc();
+ if (!client) {
+ kfree(xef);
+ return ret;
+ }
+
+ xef->drm = file;
+ xef->client = client;
+ xef->xe = xe;
+
+ mutex_init(&xef->vm.lock);
+ xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xef->exec_queue.lock);
+ xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count++;
+ spin_unlock(&xe->clients.lock);
+
+ file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
+ task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
+ if (task) {
+ xef->process_name = kstrdup(task->comm, GFP_KERNEL);
+ xef->pid = task->pid;
+ put_task_struct(task);
+ }
+
+ return 0;
+}
+
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef->process_name);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = file->driver_priv;
+ struct xe_vm *vm;
+ struct xe_exec_queue *q;
+ unsigned long idx;
+
+ xe_pm_runtime_get(xe);
+
+ /*
+ * No need for exec_queue.lock here as there is no contention for it
+ * when FD is closing as IOCTLs presumably can't be modifying the
+ * xarray. Taking exec_queue.lock here causes undue dependency on
+ * vm->lock taken during xe_exec_queue_kill().
+ */
+ xa_for_each(&xef->exec_queue.xa, idx, q) {
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+ xe_exec_queue_kill(q);
+ xe_exec_queue_put(q);
+ }
+ xa_for_each(&xef->vm.xa, idx, vm)
+ xe_vm_close_and_put(vm);
+
+ xe_file_put(xef);
+
+ xe_pm_runtime_put(xe);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+};
+
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_compat_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
+static const struct file_operations xe_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release_noglobal,
+ .unlocked_ioctl = xe_drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = xe_drm_compat_ioctl,
+ .llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+static struct drm_driver driver = {
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
+ */
+ .driver_features =
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+ .open = xe_file_open,
+ .postclose = xe_file_close,
+
+ .gem_prime_import = xe_gem_prime_import,
+
+ .dumb_create = xe_bo_dumb_create,
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = xe_drm_client_fdinfo,
+#endif
+ .ioctls = xe_ioctls,
+ .num_ioctls = ARRAY_SIZE(xe_ioctls),
+ .fops = &xe_driver_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
+ if (xe->ordered_wq)
+ destroy_workqueue(xe->ordered_wq);
+
+ if (xe->unordered_wq)
+ destroy_workqueue(xe->unordered_wq);
+
+ if (xe->destroy_wq)
+ destroy_workqueue(xe->destroy_wq);
+
+ ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct xe_device *xe;
+ int err;
+
+ xe_display_driver_set_hooks(&driver);
+
+ err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
+ if (err)
+ return ERR_PTR(err);
+
+ xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+ if (IS_ERR(xe))
+ return xe;
+
+ err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+ xe->drm.anon_inode->i_mapping,
+ xe->drm.vma_offset_manager, false, false);
+ if (WARN_ON(err))
+ goto err;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+ if (err)
+ goto err;
+
+ xe->info.devid = pdev->device;
+ xe->info.revid = pdev->revision;
+ xe->info.force_execlist = xe_modparam.force_execlist;
+
+ spin_lock_init(&xe->irq.lock);
+ spin_lock_init(&xe->clients.lock);
+
+ init_waitqueue_head(&xe->ufence_wq);
+
+ init_rwsem(&xe->usm.lock);
+
+ xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ /* Trigger a large asid and an early asid wrap. */
+ u32 asid;
+
+ BUILD_BUG_ON(XE_MAX_ASID < 2);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+ XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ drm_WARN_ON(&xe->drm, err);
+ if (err >= 0)
+ xa_erase(&xe->usm.asid_to_vm, asid);
+ }
+
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.evicted);
+
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
+ xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+ xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq || !xe->destroy_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
+ drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = xe_display_create(xe);
+ if (WARN_ON(err))
+ goto err;
+
+ return xe;
+
+err:
+ return ERR_PTR(err);
+}
+ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
+
+static bool xe_driver_flr_disabled(struct xe_device *xe)
+{
+ return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void __xe_driver_flr(struct xe_device *xe)
+{
+ const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ int ret;
+
+ drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+ /*
+ * Make sure any pending FLR requests have cleared by waiting for the
+ * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+ * to make sure it's not still set from a prior attempt (it's a write to
+ * clear bit).
+ * Note that we should never be in a situation where a previous attempt
+ * is still pending (unless the HW is totally dead), but better to be
+ * safe in case something unexpected happens
+ */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+ return;
+ }
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+
+ /* Trigger the actual Driver-FLR */
+ xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
+
+ /* Wait for hardware teardown to complete */
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Wait for hardware/firmware re-init to complete */
+ ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Clear sticky completion status */
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr(struct xe_device *xe)
+{
+ if (xe_driver_flr_disabled(xe)) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ __xe_driver_flr(xe);
+}
+
+static void xe_driver_flr_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (xe->needs_flr_on_fini)
+ xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+ unsigned int mask_size = xe->info.dma_mask_size;
+ int err;
+
+ dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+ err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ return 0;
+
+mask_err:
+ drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+ return err;
+}
+
+static bool verify_lmem_ready(struct xe_device *xe)
+{
+ u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
+
+ return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+ unsigned long timeout, start;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ if (verify_lmem_ready(xe))
+ return 0;
+
+ drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+ start = jiffies;
+ timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+ do {
+ if (signal_pending(current))
+ return -EINTR;
+
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ if (time_after(jiffies, timeout)) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
+
+ msleep(20);
+
+ } while (!verify_lmem_ready(xe));
+
+ drm_dbg(&xe->drm, "lmem ready after %ums",
+ jiffies_to_msecs(jiffies - start));
+
+ return 0;
+}
+ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
+
+static void update_device_info(struct xe_device *xe)
+{
+ /* disable features that are not available/applicable to VFs */
+ if (IS_SRIOV_VF(xe)) {
+ xe->info.probe_display = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
+ }
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+ int err;
+
+ err = xe_mmio_init(xe);
+ if (err)
+ return err;
+
+ xe_sriov_probe_early(xe);
+
+ update_device_info(xe);
+
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
+ err = wait_for_lmem_ready(xe);
+ if (err)
+ return err;
+
+ xe->wedged.mode = xe_modparam.wedged_mode;
+
+ return 0;
+}
+
+static int probe_has_flat_ccs(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u32 reg;
+ int err;
+
+ /* Always enabled/disabled, no runtime check to do */
+ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+ return 0;
+
+ gt = xe_root_mmio_gt(xe);
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+ xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+ if (!xe->info.has_flat_ccs)
+ drm_dbg(&xe->drm,
+ "Flat CCS has been disabled in bios, May lead to performance impact");
+
+ return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+ int err;
+ u8 last_gt;
+ u8 id;
+
+ xe_pat_init_early(xe);
+
+ err = xe_sriov_init(xe);
+ if (err)
+ return err;
+
+ xe->info.mem_region_mask = 1;
+ err = xe_display_init_nommio(xe);
+ if (err)
+ return err;
+
+ err = xe_set_dma_info(xe);
+ if (err)
+ return err;
+
+ err = xe_mmio_probe_tiles(xe);
+ if (err)
+ return err;
+
+ xe_ttm_sys_mgr_init(xe);
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_early(gt);
+ if (err)
+ return err;
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
+ }
+
+ for_each_tile(tile, xe, id) {
+ if (IS_SRIOV_VF(xe)) {
+ xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
+ err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
+ if (err)
+ return err;
+ err = xe_gt_sriov_vf_query_config(tile->primary_gt);
+ if (err)
+ return err;
+ }
+ err = xe_ggtt_init_early(tile->mem.ggtt);
+ if (err)
+ return err;
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_hwconfig(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
+ err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
+ err = xe_display_init_noirq(xe);
+ if (err)
+ return err;
+
+ err = xe_irq_install(xe);
+ if (err)
+ goto err;
+
+ err = probe_has_flat_ccs(xe);
+ if (err)
+ goto err;
+
+ err = xe_vram_probe(xe);
+ if (err)
+ goto err;
+
+ for_each_tile(tile, xe, id) {
+ err = xe_tile_init_noalloc(tile);
+ if (err)
+ goto err;
+ }
+
+ /* Allocate and map stolen after potential VRAM resize */
+ xe_ttm_stolen_mgr_init(xe);
+
+ /*
+ * Now that GT is initialized (TTM in particular),
+ * we can try to init display, and inherit the initial fb.
+ * This is the reason the first allocation needs to be done
+ * inside display.
+ */
+ err = xe_display_init_noaccel(xe);
+ if (err)
+ goto err;
+
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ err = xe_gt_init(gt);
+ if (err)
+ goto err_fini_gt;
+ }
+
+ xe_heci_gsc_init(xe);
+
+ err = xe_oa_init(xe);
+ if (err)
+ goto err_fini_gt;
+
+ err = xe_display_init(xe);
+ if (err)
+ goto err_fini_oa;
+
+ err = drm_dev_register(&xe->drm, 0);
+ if (err)
+ goto err_fini_display;
+
+ xe_display_register(xe);
+
+ xe_oa_register(xe);
+
+ xe_debugfs_register(xe);
+
+ xe_hwmon_register(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize_freq(gt);
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
+
+err_fini_display:
+ xe_display_driver_remove(xe);
+
+err_fini_oa:
+ xe_oa_fini(xe);
+
+err_fini_gt:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_gt_remove(gt);
+ else
+ break;
+ }
+
+err:
+ xe_display_fini(xe);
+ return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+ xe_display_unregister(xe);
+
+ drm_dev_unplug(&xe->drm);
+ xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ xe_oa_unregister(xe);
+
+ xe_device_remove_display(xe);
+
+ xe_display_fini(xe);
+
+ xe_oa_fini(xe);
+
+ xe_heci_gsc_fini(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_remove(gt);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_dbg(&xe->drm, "Shutting down device\n");
+
+ if (xe_driver_flr_disabled(xe)) {
+ xe_display_pm_shutdown(xe);
+
+ xe_irq_suspend(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
+
+ xe_display_pm_shutdown_late(xe);
+ } else {
+ /* BOOM! */
+ __xe_driver_flr(xe);
+ }
+}
+
+/**
+ * xe_device_wmb() - Device specific write memory barrier
+ * @xe: the &xe_device
+ *
+ * While wmb() is sufficient for a barrier if we use system memory, on discrete
+ * platforms with device memory we additionally need to issue a register write.
+ * Since it doesn't matter which register we write to, use the read-only VF_CAP
+ * register that is also marked as accessible by the VFs.
+ */
+void xe_device_wmb(struct xe_device *xe)
+{
+ wmb();
+ if (IS_DGFX(xe))
+ xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
+}
+
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ return;
+
+ xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ /*
+ * FIXME: We can likely do better here with our choice of
+ * timeout. Currently we just assume the worst case, i.e. 150us,
+ * which is believed to be sufficient to cover the worst case
+ * scenario on current platforms if all cache entries are
+ * transient and need to be flushed..
+ */
+ if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ 150, NULL, false))
+ xe_gt_err_once(gt, "TD flush timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+}
+
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
+
+<<<<<<<
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+=======
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+>>>>>>>
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+ return xe_device_has_flat_ccs(xe) ?
+ DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+ xe_assert(xe, !xe_pm_runtime_suspended(xe));
+}
+
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+
+ for_each_gt(gt, xe, id) {
+ drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tTile: %u\n", gt->tile->id);
+ drm_printf(p, "\tType: %s\n",
+ gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ drm_printf(p, "\tIP ver: %u.%u.%u\n",
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ }
+}
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return sign_extend64(address, xe->info.va_bits - 1);
+}
+
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
+}
+
+static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a mudule
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (xe->wedged.mode == 0) {
+ drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
+ return;
+ }
+
+ xe_pm_runtime_get_noresume(xe);
+
+ if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
+ drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
+ return;
+ }
+
+ if (!atomic_xchg(&xe->wedged.flag, 1)) {
+ xe->needs_flr_on_fini = true;
+ drm_err(&xe->drm,
+ "CRITICAL: Xe has declared device %s as wedged.\n"
+ "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+ "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+ dev_name(xe->drm.dev));
+ }
+
+ for_each_gt(gt, xe, id)
+ xe_gt_declare_wedged(gt);
+}
diff --git a/rr-cache/5fdf7e6a138a878e8f03a52286f9d1bbf3929b0e/preimage b/rr-cache/5fdf7e6a138a878e8f03a52286f9d1bbf3929b0e/preimage
new file mode 100644
index 000000000000..cb7c47d1c9a3
--- /dev/null
+++ b/rr-cache/5fdf7e6a138a878e8f03a52286f9d1bbf3929b0e/preimage
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_display.h"
+#include "regs/xe_regs.h"
+
+#include <linux/fb.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "soc/intel_dram.h"
+#include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */
+#include "intel_acpi.h"
+#include "intel_audio.h"
+#include "intel_bw.h"
+#include "intel_display.h"
+#include "intel_display_driver.h"
+#include "intel_display_irq.h"
+#include "intel_display_types.h"
+#include "intel_dmc.h"
+#include "intel_dp.h"
+#include "intel_encoder.h"
+#include "intel_fbdev.h"
+#include "intel_hdcp.h"
+#include "intel_hotplug.h"
+#include "intel_opregion.h"
+#include "xe_module.h"
+
+/* Xe device functions */
+
+static bool has_display(struct xe_device *xe)
+{
+ return HAS_DISPLAY(xe);
+}
+
+/**
+ * xe_display_driver_probe_defer - Detect if we need to wait for other drivers
+ * early on
+ * @pdev: PCI device
+ *
+ * Returns: true if probe needs to be deferred, false otherwise
+ */
+bool xe_display_driver_probe_defer(struct pci_dev *pdev)
+{
+ if (!xe_modparam.probe_display)
+ return 0;
+
+ return intel_display_driver_probe_defer(pdev);
+}
+
+/**
+ * xe_display_driver_set_hooks - Add driver flags and hooks for display
+ * @driver: DRM device driver
+ *
+ * Set features and function hooks in @driver that are needed for driving the
+ * display IP. This sets the driver's capability of driving display, regardless
+ * if the device has it enabled
+ */
+void xe_display_driver_set_hooks(struct drm_driver *driver)
+{
+ if (!xe_modparam.probe_display)
+ return;
+
+ driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
+}
+
+static void unset_display_features(struct xe_device *xe)
+{
+ xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+}
+
+static void display_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ destroy_workqueue(xe->display.hotplug.dp_wq);
+}
+
+/**
+ * xe_display_create - create display struct
+ * @xe: XE device instance
+ *
+ * Initialize all fields used by the display part.
+ *
+ * TODO: once everything can be inside a single struct, make the struct opaque
+ * to the rest of xe and return it to be xe->display.
+ *
+ * Returns: 0 on success
+ */
+int xe_display_create(struct xe_device *xe)
+{
+ spin_lock_init(&xe->display.fb_tracking.lock);
+
+ xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
+
+ return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
+}
+
+static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_cleanup(xe);
+}
+
+int xe_display_init_nommio(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return 0;
+
+ /* Fake uncore lock */
+ spin_lock_init(&xe->uncore.lock);
+
+ /* This must be called before any calls to HAS_PCH_* */
+ intel_detect_pch(xe);
+
+ return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
+}
+
+static void xe_display_fini_noirq(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_remove_noirq(xe);
+ intel_opregion_cleanup(display);
+}
+
+int xe_display_init_noirq(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+ int err;
+
+ if (!xe->info.probe_display)
+ return 0;
+
+ intel_display_driver_early_probe(xe);
+
+ /* Early display init.. */
+ intel_opregion_setup(display);
+
+ /*
+ * Fill the dram structure to get the system dram info. This will be
+ * used for memory latency calculation.
+ */
+ intel_dram_detect(xe);
+
+ intel_bw_init_hw(xe);
+
+ intel_display_device_info_runtime_init(xe);
+
+ err = intel_display_driver_probe_noirq(xe);
+ if (err) {
+ intel_opregion_cleanup(display);
+ return err;
+ }
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe);
+}
+
+static void xe_display_fini_noaccel(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_remove_nogem(xe);
+}
+
+int xe_display_init_noaccel(struct xe_device *xe)
+{
+ int err;
+
+ if (!xe->info.probe_display)
+ return 0;
+
+ err = intel_display_driver_probe_nogem(xe);
+ if (err)
+ return err;
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noaccel, xe);
+}
+
+int xe_display_init(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return 0;
+
+ return intel_display_driver_probe(xe);
+}
+
+void xe_display_fini(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_hpd_poll_fini(xe);
+
+ intel_hdcp_component_fini(xe);
+ intel_audio_deinit(xe);
+}
+
+void xe_display_register(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_register(xe);
+ intel_register_dsm_handler();
+ intel_power_domains_enable(xe);
+}
+
+void xe_display_unregister(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_unregister_dsm_handler();
+ intel_power_domains_disable(xe);
+ intel_display_driver_unregister(xe);
+}
+
+void xe_display_driver_remove(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_remove(xe);
+}
+
+/* IRQ-related functions */
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (master_ctl & DISPLAY_IRQ)
+ gen11_display_irq_handler(xe);
+}
+
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ if (gu_misc_iir & GU_MISC_GSE)
+ intel_opregion_asle_intr(display);
+}
+
+void xe_display_irq_reset(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ gen11_display_irq_reset(xe);
+}
+
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (gt->info.id == XE_GT0)
+ gen11_de_irq_postinstall(xe);
+}
+
+static bool suspend_to_idle(void)
+{
+#if IS_ENABLED(CONFIG_ACPI_SLEEP)
+ if (acpi_target_system_state() < ACPI_STATE_S3)
+ return true;
+#endif
+ return false;
+}
+
+static void xe_display_flush_cleanup_work(struct xe_device *xe)
+{
+ struct intel_crtc *crtc;
+
+ for_each_intel_crtc(&xe->drm, crtc) {
+ struct drm_crtc_commit *commit;
+
+ spin_lock(&crtc->base.commit_lock);
+ commit = list_first_entry_or_null(&crtc->base.commit_list,
+ struct drm_crtc_commit, commit_entry);
+ if (commit)
+ drm_crtc_commit_get(commit);
+ spin_unlock(&crtc->base.commit_lock);
+
+ if (commit) {
+ wait_for_completion(&commit->cleanup_done);
+ drm_crtc_commit_put(commit);
+ }
+ }
+}
+
+/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
+void xe_display_pm_runtime_suspend(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed)
+ xe_display_pm_suspend(xe, true);
+
+ intel_hpd_poll_enable(xe);
+}
+
+void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
+{
+ struct intel_display *display = &xe->display;
+ bool s2idle = suspend_to_idle();
+ if (!xe->info.probe_display)
+ return;
+
+ /*
+ * We do a lot of poking in a lot of registers, make sure they work
+ * properly.
+ */
+ intel_power_domains_disable(xe);
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (!runtime && has_display(xe)) {
+ drm_kms_helper_poll_disable(&xe->drm);
+ intel_display_driver_disable_user_access(xe);
+ intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
+
+ intel_dp_mst_suspend(xe);
+
+ intel_hpd_cancel_work(xe);
+
+ if (!runtime && has_display(xe)) {
+ intel_display_driver_suspend_access(xe);
+ intel_encoder_suspend_all(&xe->display);
+ }
+
+ intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
+
+ intel_dmc_suspend(xe);
+
+ if (runtime && has_display(xe))
+ intel_hpd_poll_enable(xe);
+}
+
+<<<<<<<
+=======
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+ __xe_display_pm_suspend(xe, false);
+}
+
+void xe_display_pm_shutdown(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_disable(xe);
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (has_display(xe)) {
+ drm_kms_helper_poll_disable(&xe->drm);
+ intel_display_driver_disable_user_access(xe);
+ intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
+ intel_dp_mst_suspend(xe);
+ intel_hpd_cancel_work(xe);
+
+ if (has_display(xe))
+ intel_display_driver_suspend_access(xe);
+
+ intel_encoder_suspend_all(display);
+ intel_encoder_shutdown_all(display);
+
+ intel_opregion_suspend(display, PCI_D3cold);
+
+ intel_dmc_suspend(xe);
+}
+
+void xe_display_pm_runtime_suspend(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_suspend(xe, true);
+ return;
+ }
+
+ intel_hpd_poll_enable(xe);
+}
+
+>>>>>>>
+void xe_display_pm_suspend_late(struct xe_device *xe)
+{
+ bool s2idle = suspend_to_idle();
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_suspend(xe, s2idle);
+
+ intel_display_power_suspend_late(xe);
+}
+
+void xe_display_pm_runtime_resume(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_hpd_poll_disable(xe);
+
+ if (xe->d3cold.allowed)
+ xe_display_pm_resume(xe, true);
+}
+
+void xe_display_pm_resume_early(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_power_resume_early(xe);
+
+ intel_power_domains_resume(xe);
+}
+
+void xe_display_pm_resume(struct xe_device *xe, bool runtime)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_dmc_resume(xe);
+
+ if (has_display(xe))
+ drm_mode_config_reset(&xe->drm);
+
+ intel_display_driver_init_hw(xe);
+ intel_hpd_init(xe);
+
+ if (!runtime && has_display(xe))
+ intel_display_driver_resume_access(xe);
+
+ /* MST sideband requires HPD interrupts enabled */
+ intel_dp_mst_resume(xe);
+ if (!runtime && has_display(xe)) {
+ intel_display_driver_resume(xe);
+ drm_kms_helper_poll_enable(&xe->drm);
+ intel_display_driver_enable_user_access(xe);
+ }
+
+ if (has_display(xe))
+ intel_hpd_poll_disable(xe);
+
+ intel_opregion_resume(display);
+
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
+
+ intel_power_domains_enable(xe);
+}
+
+<<<<<<<
+=======
+void xe_display_pm_resume(struct xe_device *xe)
+{
+ __xe_display_pm_resume(xe, false);
+}
+
+void xe_display_pm_runtime_resume(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_resume(xe, true);
+ return;
+ }
+
+ intel_hpd_poll_disable(xe);
+}
+
+
+>>>>>>>
+static void display_device_remove(struct drm_device *dev, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ intel_display_device_remove(xe);
+}
+
+int xe_display_probe(struct xe_device *xe)
+{
+ int err;
+
+ if (!xe->info.probe_display)
+ goto no_display;
+
+ intel_display_device_probe(xe);
+
+ err = drmm_add_action_or_reset(&xe->drm, display_device_remove, xe);
+ if (err)
+ return err;
+
+ if (has_display(xe))
+ return 0;
+
+no_display:
+ xe->info.probe_display = false;
+ unset_display_features(xe);
+ return 0;
+}
diff --git a/rr-cache/64a8b1e687217096af72085e0d85f39e6d425bd5/preimage b/rr-cache/64a8b1e687217096af72085e0d85f39e6d425bd5/preimage
new file mode 100644
index 000000000000..bf64eecd1621
--- /dev/null
+++ b/rr-cache/64a8b1e687217096af72085e0d85f39e6d425bd5/preimage
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DISPLAY_H_
+#define _XE_DISPLAY_H_
+
+#include "xe_device.h"
+
+struct drm_driver;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+
+bool xe_display_driver_probe_defer(struct pci_dev *pdev);
+void xe_display_driver_set_hooks(struct drm_driver *driver);
+void xe_display_driver_remove(struct xe_device *xe);
+
+int xe_display_create(struct xe_device *xe);
+
+int xe_display_probe(struct xe_device *xe);
+
+int xe_display_init_nommio(struct xe_device *xe);
+int xe_display_init_noirq(struct xe_device *xe);
+int xe_display_init_noaccel(struct xe_device *xe);
+int xe_display_init(struct xe_device *xe);
+void xe_display_fini(struct xe_device *xe);
+
+void xe_display_register(struct xe_device *xe);
+void xe_display_unregister(struct xe_device *xe);
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl);
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
+void xe_display_irq_reset(struct xe_device *xe);
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
+
+void xe_display_pm_suspend(struct xe_device *xe);
+<<<<<<<
+=======
+void xe_display_pm_shutdown(struct xe_device *xe);
+>>>>>>>
+void xe_display_pm_suspend_late(struct xe_device *xe);
+void xe_display_pm_shutdown_late(struct xe_device *xe);
+void xe_display_pm_resume_early(struct xe_device *xe);
+void xe_display_pm_resume(struct xe_device *xe);
+void xe_display_pm_runtime_suspend(struct xe_device *xe);
+void xe_display_pm_runtime_resume(struct xe_device *xe);
+
+#else
+
+static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0; }
+static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { }
+static inline void xe_display_driver_remove(struct xe_device *xe) {}
+
+static inline int xe_display_create(struct xe_device *xe) { return 0; }
+
+static inline int xe_display_probe(struct xe_device *xe) { return 0; }
+
+static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noaccel(struct xe_device *xe) { return 0; }
+static inline int xe_display_init(struct xe_device *xe) { return 0; }
+static inline void xe_display_fini(struct xe_device *xe) {}
+
+static inline void xe_display_register(struct xe_device *xe) {}
+static inline void xe_display_unregister(struct xe_device *xe) {}
+
+static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {}
+static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {}
+static inline void xe_display_irq_reset(struct xe_device *xe) {}
+static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
+
+static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+<<<<<<<
+=======
+static inline void xe_display_pm_shutdown(struct xe_device *xe) {}
+>>>>>>>
+static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
+static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {}
+static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
+static inline void xe_display_pm_resume(struct xe_device *xe) {}
+static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}
+
+#endif /* CONFIG_DRM_XE_DISPLAY */
+#endif /* _XE_DISPLAY_H_ */
diff --git a/rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/postimage b/rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/postimage
new file mode 100644
index 000000000000..10fd4601b9f2
--- /dev/null
+++ b/rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/postimage
@@ -0,0 +1,1001 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/delay.h>
+#include <linux/units.h>
+
+#include <drm/drm_aperture.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "display/xe_display.h"
+#include "instructions/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_devcoredump.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
+#include "xe_hw_engine_group.h"
+#include "xe_hwmon.h"
+#include "xe_irq.h"
+#include "xe_memirq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_observation.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_sriov.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_vram.h"
+#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_drm_client *client;
+ struct xe_file *xef;
+ int ret = -ENOMEM;
+ struct task_struct *task = NULL;
+
+ xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+ if (!xef)
+ return ret;
+
+ client = xe_drm_client_alloc();
+ if (!client) {
+ kfree(xef);
+ return ret;
+ }
+
+ xef->drm = file;
+ xef->client = client;
+ xef->xe = xe;
+
+ mutex_init(&xef->vm.lock);
+ xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xef->exec_queue.lock);
+ xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count++;
+ spin_unlock(&xe->clients.lock);
+
+ file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
+ task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
+ if (task) {
+ xef->process_name = kstrdup(task->comm, GFP_KERNEL);
+ xef->pid = task->pid;
+ put_task_struct(task);
+ }
+
+ return 0;
+}
+
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef->process_name);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = file->driver_priv;
+ struct xe_vm *vm;
+ struct xe_exec_queue *q;
+ unsigned long idx;
+
+ xe_pm_runtime_get(xe);
+
+ /*
+ * No need for exec_queue.lock here as there is no contention for it
+ * when FD is closing as IOCTLs presumably can't be modifying the
+ * xarray. Taking exec_queue.lock here causes undue dependency on
+ * vm->lock taken during xe_exec_queue_kill().
+ */
+ xa_for_each(&xef->exec_queue.xa, idx, q) {
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+ xe_exec_queue_kill(q);
+ xe_exec_queue_put(q);
+ }
+ xa_for_each(&xef->vm.xa, idx, vm)
+ xe_vm_close_and_put(vm);
+
+ xe_file_put(xef);
+
+ xe_pm_runtime_put(xe);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+};
+
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_compat_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
+static const struct file_operations xe_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release_noglobal,
+ .unlocked_ioctl = xe_drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = xe_drm_compat_ioctl,
+ .llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+static struct drm_driver driver = {
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
+ */
+ .driver_features =
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+ .open = xe_file_open,
+ .postclose = xe_file_close,
+
+ .gem_prime_import = xe_gem_prime_import,
+
+ .dumb_create = xe_bo_dumb_create,
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = xe_drm_client_fdinfo,
+#endif
+ .ioctls = xe_ioctls,
+ .num_ioctls = ARRAY_SIZE(xe_ioctls),
+ .fops = &xe_driver_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
+ if (xe->ordered_wq)
+ destroy_workqueue(xe->ordered_wq);
+
+ if (xe->unordered_wq)
+ destroy_workqueue(xe->unordered_wq);
+
+ if (xe->destroy_wq)
+ destroy_workqueue(xe->destroy_wq);
+
+ ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct xe_device *xe;
+ int err;
+
+ xe_display_driver_set_hooks(&driver);
+
+ err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+ if (err)
+ return ERR_PTR(err);
+
+ xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+ if (IS_ERR(xe))
+ return xe;
+
+ err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+ xe->drm.anon_inode->i_mapping,
+ xe->drm.vma_offset_manager, false, false);
+ if (WARN_ON(err))
+ goto err;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+ if (err)
+ goto err;
+
+ xe->info.devid = pdev->device;
+ xe->info.revid = pdev->revision;
+ xe->info.force_execlist = xe_modparam.force_execlist;
+
+ spin_lock_init(&xe->irq.lock);
+ spin_lock_init(&xe->clients.lock);
+
+ init_waitqueue_head(&xe->ufence_wq);
+
+ init_rwsem(&xe->usm.lock);
+
+ xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ /* Trigger a large asid and an early asid wrap. */
+ u32 asid;
+
+ BUILD_BUG_ON(XE_MAX_ASID < 2);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+ XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ drm_WARN_ON(&xe->drm, err);
+ if (err >= 0)
+ xa_erase(&xe->usm.asid_to_vm, asid);
+ }
+
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.evicted);
+
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
+ xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+ xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq || !xe->destroy_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
+ drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = xe_display_create(xe);
+ if (WARN_ON(err))
+ goto err;
+
+ return xe;
+
+err:
+ return ERR_PTR(err);
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void xe_driver_flr(struct xe_device *xe)
+{
+ const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ int ret;
+
+ if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+ /*
+ * Make sure any pending FLR requests have cleared by waiting for the
+ * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+ * to make sure it's not still set from a prior attempt (it's a write to
+ * clear bit).
+ * Note that we should never be in a situation where a previous attempt
+ * is still pending (unless the HW is totally dead), but better to be
+ * safe in case something unexpected happens
+ */
+ ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+ return;
+ }
+ xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+
+ /* Trigger the actual Driver-FLR */
+ xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+
+ /* Wait for hardware teardown to complete */
+ ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Wait for hardware/firmware re-init to complete */
+ ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Clear sticky completion status */
+ xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (xe->needs_flr_on_fini)
+ xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+ unsigned int mask_size = xe->info.dma_mask_size;
+ int err;
+
+ dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+ err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ return 0;
+
+mask_err:
+ drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+ return err;
+}
+
+static bool verify_lmem_ready(struct xe_gt *gt)
+{
+ u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
+
+ return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ unsigned long timeout, start;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ if (verify_lmem_ready(gt))
+ return 0;
+
+ drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+ start = jiffies;
+ timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+ do {
+ if (signal_pending(current))
+ return -EINTR;
+
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ if (time_after(jiffies, timeout)) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
+
+ msleep(20);
+
+ } while (!verify_lmem_ready(gt));
+
+ drm_dbg(&xe->drm, "lmem ready after %ums",
+ jiffies_to_msecs(jiffies - start));
+
+ return 0;
+}
+
+static void update_device_info(struct xe_device *xe)
+{
+ /* disable features that are not available/applicable to VFs */
+ if (IS_SRIOV_VF(xe)) {
+ xe->info.probe_display = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
+ }
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+ int err;
+
+ err = xe_mmio_init(xe);
+ if (err)
+ return err;
+
+ xe_sriov_probe_early(xe);
+
+ update_device_info(xe);
+
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
+ err = wait_for_lmem_ready(xe);
+ if (err)
+ return err;
+
+ xe->wedged.mode = xe_modparam.wedged_mode;
+
+ return 0;
+}
+
+static int xe_device_set_has_flat_ccs(struct xe_device *xe)
+{
+ u32 reg;
+ int err;
+
+ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+ return 0;
+
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+ xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+ if (!xe->info.has_flat_ccs)
+ drm_dbg(&xe->drm,
+ "Flat CCS has been disabled in bios, May lead to performance impact");
+
+ return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+ int err;
+ u8 last_gt;
+ u8 id;
+
+ xe_pat_init_early(xe);
+
+ err = xe_sriov_init(xe);
+ if (err)
+ return err;
+
+ xe->info.mem_region_mask = 1;
+ err = xe_display_init_nommio(xe);
+ if (err)
+ return err;
+
+ err = xe_set_dma_info(xe);
+ if (err)
+ return err;
+
+ err = xe_mmio_probe_tiles(xe);
+ if (err)
+ return err;
+
+ xe_ttm_sys_mgr_init(xe);
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_early(gt);
+ if (err)
+ return err;
+ }
+
+ for_each_tile(tile, xe, id) {
+ if (IS_SRIOV_VF(xe)) {
+ xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
+ err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
+ if (err)
+ return err;
+ err = xe_gt_sriov_vf_query_config(tile->primary_gt);
+ if (err)
+ return err;
+ }
+ err = xe_ggtt_init_early(tile->mem.ggtt);
+ if (err)
+ return err;
+ if (IS_SRIOV_VF(xe)) {
+ err = xe_memirq_init(&tile->sriov.vf.memirq);
+ if (err)
+ return err;
+ }
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_hwconfig(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
+ err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
+ err = xe_display_init_noirq(xe);
+ if (err)
+ return err;
+
+ err = xe_irq_install(xe);
+ if (err)
+ goto err;
+
+ err = xe_device_set_has_flat_ccs(xe);
+ if (err)
+ goto err;
+
+ err = xe_vram_probe(xe);
+ if (err)
+ goto err;
+
+ for_each_tile(tile, xe, id) {
+ err = xe_tile_init_noalloc(tile);
+ if (err)
+ goto err;
+ }
+
+ /* Allocate and map stolen after potential VRAM resize */
+ xe_ttm_stolen_mgr_init(xe);
+
+ /*
+ * Now that GT is initialized (TTM in particular),
+ * we can try to init display, and inherit the initial fb.
+ * This is the reason the first allocation needs to be done
+ * inside display.
+ */
+ err = xe_display_init_noaccel(xe);
+ if (err)
+ goto err;
+
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ err = xe_gt_init(gt);
+ if (err)
+ goto err_fini_gt;
+ }
+
+ xe_heci_gsc_init(xe);
+
+ err = xe_oa_init(xe);
+ if (err)
+ goto err_fini_gt;
+
+ err = xe_display_init(xe);
+ if (err)
+ goto err_fini_oa;
+
+ err = drm_dev_register(&xe->drm, 0);
+ if (err)
+ goto err_fini_display;
+
+ xe_display_register(xe);
+
+ xe_oa_register(xe);
+
+ xe_debugfs_register(xe);
+
+ xe_hwmon_register(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize_freq(gt);
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
+
+err_fini_display:
+ xe_display_driver_remove(xe);
+
+err_fini_oa:
+ xe_oa_fini(xe);
+
+err_fini_gt:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_gt_remove(gt);
+ else
+ break;
+ }
+
+err:
+ xe_display_fini(xe);
+ return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+ xe_display_unregister(xe);
+
+ drm_dev_unplug(&xe->drm);
+ xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ xe_oa_unregister(xe);
+
+ xe_device_remove_display(xe);
+
+ xe_display_fini(xe);
+
+ xe_oa_fini(xe);
+
+ xe_heci_gsc_fini(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_remove(gt);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+}
+
+/**
+ * xe_device_wmb() - Device specific write memory barrier
+ * @xe: the &xe_device
+ *
+ * While wmb() is sufficient for a barrier if we use system memory, on discrete
+ * platforms with device memory we additionally need to issue a register write.
+ * Since it doesn't matter which register we write to, use the read-only VF_CAP
+ * register that is also marked as accessible by the VFs.
+ */
+void xe_device_wmb(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+ wmb();
+ if (IS_DGFX(xe))
+ xe_mmio_write32(gt, VF_CAP_REG, 0);
+}
+
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ return;
+
+ xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ /*
+ * FIXME: We can likely do better here with our choice of
+ * timeout. Currently we just assume the worst case, i.e. 150us,
+ * which is believed to be sufficient to cover the worst case
+ * scenario on current platforms if all cache entries are
+ * transient and need to be flushed..
+ */
+ if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ 150, NULL, false))
+ xe_gt_err_once(gt, "TD flush timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+}
+
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+ return xe_device_has_flat_ccs(xe) ?
+ DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+ xe_assert(xe, !xe_pm_runtime_suspended(xe));
+}
+
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+
+ for_each_gt(gt, xe, id) {
+ drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tType: %s\n",
+ gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ drm_printf(p, "\tIP ver: %u.%u.%u\n",
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ }
+}
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return sign_extend64(address, xe->info.va_bits - 1);
+}
+
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
+}
+
+static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a mudule
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (xe->wedged.mode == 0) {
+ drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
+ return;
+ }
+
+ xe_pm_runtime_get_noresume(xe);
+
+ if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
+ drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
+ return;
+ }
+
+ if (!atomic_xchg(&xe->wedged.flag, 1)) {
+ xe->needs_flr_on_fini = true;
+ drm_err(&xe->drm,
+ "CRITICAL: Xe has declared device %s as wedged.\n"
+ "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+ "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+ dev_name(xe->drm.dev));
+ }
+
+ for_each_gt(gt, xe, id)
+ xe_gt_declare_wedged(gt);
+}
diff --git a/rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/preimage b/rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/preimage
new file mode 100644
index 000000000000..b707f9f74054
--- /dev/null
+++ b/rr-cache/a34411237f3928e39f7e5868312a9debb5b6885f/preimage
@@ -0,0 +1,1005 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/delay.h>
+#include <linux/units.h>
+
+#include <drm/drm_aperture.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "display/xe_display.h"
+#include "instructions/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_devcoredump.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
+#include "xe_hw_engine_group.h"
+#include "xe_hwmon.h"
+#include "xe_irq.h"
+#include "xe_memirq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_observation.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_sriov.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_vram.h"
+#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_drm_client *client;
+ struct xe_file *xef;
+ int ret = -ENOMEM;
+ struct task_struct *task = NULL;
+
+ xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+ if (!xef)
+ return ret;
+
+ client = xe_drm_client_alloc();
+ if (!client) {
+ kfree(xef);
+ return ret;
+ }
+
+ xef->drm = file;
+ xef->client = client;
+ xef->xe = xe;
+
+ mutex_init(&xef->vm.lock);
+ xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xef->exec_queue.lock);
+ xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count++;
+ spin_unlock(&xe->clients.lock);
+
+ file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
+ task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
+ if (task) {
+ xef->process_name = kstrdup(task->comm, GFP_KERNEL);
+ xef->pid = task->pid;
+ put_task_struct(task);
+ }
+
+ return 0;
+}
+
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef->process_name);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = file->driver_priv;
+ struct xe_vm *vm;
+ struct xe_exec_queue *q;
+ unsigned long idx;
+
+ xe_pm_runtime_get(xe);
+
+ /*
+ * No need for exec_queue.lock here as there is no contention for it
+ * when FD is closing as IOCTLs presumably can't be modifying the
+ * xarray. Taking exec_queue.lock here causes undue dependency on
+ * vm->lock taken during xe_exec_queue_kill().
+ */
+ xa_for_each(&xef->exec_queue.xa, idx, q) {
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+ xe_exec_queue_kill(q);
+ xe_exec_queue_put(q);
+ }
+ xa_for_each(&xef->vm.xa, idx, vm)
+ xe_vm_close_and_put(vm);
+
+ xe_file_put(xef);
+
+ xe_pm_runtime_put(xe);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+};
+
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = file->private_data;
+ struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+ long ret;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ ret = xe_pm_runtime_get_ioctl(xe);
+ if (ret >= 0)
+ ret = drm_compat_ioctl(file, cmd, arg);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
+static const struct file_operations xe_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release_noglobal,
+ .unlocked_ioctl = xe_drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = xe_drm_compat_ioctl,
+ .llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+static struct drm_driver driver = {
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
+ */
+ .driver_features =
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+ .open = xe_file_open,
+ .postclose = xe_file_close,
+
+ .gem_prime_import = xe_gem_prime_import,
+
+ .dumb_create = xe_bo_dumb_create,
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = xe_drm_client_fdinfo,
+#endif
+ .ioctls = xe_ioctls,
+ .num_ioctls = ARRAY_SIZE(xe_ioctls),
+ .fops = &xe_driver_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
+ if (xe->ordered_wq)
+ destroy_workqueue(xe->ordered_wq);
+
+ if (xe->unordered_wq)
+ destroy_workqueue(xe->unordered_wq);
+
+ if (xe->destroy_wq)
+ destroy_workqueue(xe->destroy_wq);
+
+ ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct xe_device *xe;
+ int err;
+
+ xe_display_driver_set_hooks(&driver);
+
+ err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+ if (err)
+ return ERR_PTR(err);
+
+ xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+ if (IS_ERR(xe))
+ return xe;
+
+ err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+ xe->drm.anon_inode->i_mapping,
+ xe->drm.vma_offset_manager, false, false);
+ if (WARN_ON(err))
+ goto err;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+ if (err)
+ goto err;
+
+ xe->info.devid = pdev->device;
+ xe->info.revid = pdev->revision;
+ xe->info.force_execlist = xe_modparam.force_execlist;
+
+ spin_lock_init(&xe->irq.lock);
+ spin_lock_init(&xe->clients.lock);
+
+ init_waitqueue_head(&xe->ufence_wq);
+
+ init_rwsem(&xe->usm.lock);
+
+ xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ /* Trigger a large asid and an early asid wrap. */
+ u32 asid;
+
+ BUILD_BUG_ON(XE_MAX_ASID < 2);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+ XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ drm_WARN_ON(&xe->drm, err);
+ if (err >= 0)
+ xa_erase(&xe->usm.asid_to_vm, asid);
+ }
+
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.evicted);
+
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
+ xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+ xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq || !xe->destroy_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
+ drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = xe_display_create(xe);
+ if (WARN_ON(err))
+ goto err;
+
+ return xe;
+
+err:
+ return ERR_PTR(err);
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void xe_driver_flr(struct xe_device *xe)
+{
+ const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ int ret;
+
+ if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+ /*
+ * Make sure any pending FLR requests have cleared by waiting for the
+ * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+ * to make sure it's not still set from a prior attempt (it's a write to
+ * clear bit).
+ * Note that we should never be in a situation where a previous attempt
+ * is still pending (unless the HW is totally dead), but better to be
+ * safe in case something unexpected happens
+ */
+ ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+ return;
+ }
+ xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+
+ /* Trigger the actual Driver-FLR */
+ xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+
+ /* Wait for hardware teardown to complete */
+ ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Wait for hardware/firmware re-init to complete */
+ ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Clear sticky completion status */
+ xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (xe->needs_flr_on_fini)
+ xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+ unsigned int mask_size = xe->info.dma_mask_size;
+ int err;
+
+ dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+ err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ return 0;
+
+mask_err:
+ drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+ return err;
+}
+
+static bool verify_lmem_ready(struct xe_gt *gt)
+{
+ u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
+
+ return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ unsigned long timeout, start;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ if (verify_lmem_ready(gt))
+ return 0;
+
+ drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+ start = jiffies;
+ timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+ do {
+ if (signal_pending(current))
+ return -EINTR;
+
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ if (time_after(jiffies, timeout)) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
+
+ msleep(20);
+
+ } while (!verify_lmem_ready(gt));
+
+ drm_dbg(&xe->drm, "lmem ready after %ums",
+ jiffies_to_msecs(jiffies - start));
+
+ return 0;
+}
+
+static void update_device_info(struct xe_device *xe)
+{
+ /* disable features that are not available/applicable to VFs */
+ if (IS_SRIOV_VF(xe)) {
+ xe->info.probe_display = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
+ }
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+ int err;
+
+ err = xe_mmio_init(xe);
+ if (err)
+ return err;
+
+ xe_sriov_probe_early(xe);
+
+ update_device_info(xe);
+
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
+ err = wait_for_lmem_ready(xe);
+ if (err)
+ return err;
+
+ xe->wedged.mode = xe_modparam.wedged_mode;
+
+ return 0;
+}
+
+static int xe_device_set_has_flat_ccs(struct xe_device *xe)
+{
+ u32 reg;
+ int err;
+
+ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+ return 0;
+
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+ xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+ if (!xe->info.has_flat_ccs)
+ drm_dbg(&xe->drm,
+ "Flat CCS has been disabled in bios, May lead to performance impact");
+
+ return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+ int err;
+ u8 last_gt;
+ u8 id;
+
+ xe_pat_init_early(xe);
+
+ err = xe_sriov_init(xe);
+ if (err)
+ return err;
+
+ xe->info.mem_region_mask = 1;
+ err = xe_display_init_nommio(xe);
+ if (err)
+ return err;
+
+ err = xe_set_dma_info(xe);
+ if (err)
+ return err;
+
+ err = xe_mmio_probe_tiles(xe);
+ if (err)
+ return err;
+
+ xe_ttm_sys_mgr_init(xe);
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_early(gt);
+ if (err)
+ return err;
+ }
+
+ for_each_tile(tile, xe, id) {
+ if (IS_SRIOV_VF(xe)) {
+ xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
+ err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
+ if (err)
+ return err;
+ err = xe_gt_sriov_vf_query_config(tile->primary_gt);
+ if (err)
+ return err;
+ }
+ err = xe_ggtt_init_early(tile->mem.ggtt);
+ if (err)
+ return err;
+ if (IS_SRIOV_VF(xe)) {
+ err = xe_memirq_init(&tile->sriov.vf.memirq);
+ if (err)
+ return err;
+ }
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_hwconfig(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
+ err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
+ err = xe_display_init_noirq(xe);
+ if (err)
+ return err;
+
+ err = xe_irq_install(xe);
+ if (err)
+ goto err;
+
+ err = xe_device_set_has_flat_ccs(xe);
+ if (err)
+ goto err;
+
+ err = xe_vram_probe(xe);
+ if (err)
+ goto err;
+
+ for_each_tile(tile, xe, id) {
+ err = xe_tile_init_noalloc(tile);
+ if (err)
+ goto err;
+ }
+
+ /* Allocate and map stolen after potential VRAM resize */
+ xe_ttm_stolen_mgr_init(xe);
+
+ /*
+ * Now that GT is initialized (TTM in particular),
+ * we can try to init display, and inherit the initial fb.
+ * This is the reason the first allocation needs to be done
+ * inside display.
+ */
+ err = xe_display_init_noaccel(xe);
+ if (err)
+ goto err;
+
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ err = xe_gt_init(gt);
+ if (err)
+ goto err_fini_gt;
+ }
+
+ xe_heci_gsc_init(xe);
+
+ err = xe_oa_init(xe);
+ if (err)
+ goto err_fini_gt;
+
+ err = xe_display_init(xe);
+ if (err)
+ goto err_fini_oa;
+
+ err = drm_dev_register(&xe->drm, 0);
+ if (err)
+ goto err_fini_display;
+
+ xe_display_register(xe);
+
+ xe_oa_register(xe);
+
+ xe_debugfs_register(xe);
+
+ xe_hwmon_register(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_sanitize_freq(gt);
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
+
+err_fini_display:
+ xe_display_driver_remove(xe);
+
+err_fini_oa:
+ xe_oa_fini(xe);
+
+err_fini_gt:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_gt_remove(gt);
+ else
+ break;
+ }
+
+err:
+ xe_display_fini(xe);
+ return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+ xe_display_unregister(xe);
+
+ drm_dev_unplug(&xe->drm);
+ xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ xe_oa_unregister(xe);
+
+ xe_device_remove_display(xe);
+
+ xe_display_fini(xe);
+
+ xe_oa_fini(xe);
+
+ xe_heci_gsc_fini(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_remove(gt);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+}
+
+/**
+ * xe_device_wmb() - Device specific write memory barrier
+ * @xe: the &xe_device
+ *
+ * While wmb() is sufficient for a barrier if we use system memory, on discrete
+ * platforms with device memory we additionally need to issue a register write.
+ * Since it doesn't matter which register we write to, use the read-only VF_CAP
+ * register that is also marked as accessible by the VFs.
+ */
+void xe_device_wmb(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+ wmb();
+ if (IS_DGFX(xe))
+ xe_mmio_write32(gt, VF_CAP_REG, 0);
+}
+
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ return;
+
+ xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ /*
+ * FIXME: We can likely do better here with our choice of
+ * timeout. Currently we just assume the worst case, i.e. 150us,
+ * which is believed to be sufficient to cover the worst case
+ * scenario on current platforms if all cache entries are
+ * transient and need to be flushed..
+ */
+ if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ 150, NULL, false))
+ xe_gt_err_once(gt, "TD flush timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+}
+
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+
+<<<<<<<
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+=======
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
+>>>>>>>
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+ return xe_device_has_flat_ccs(xe) ?
+ DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+ xe_assert(xe, !xe_pm_runtime_suspended(xe));
+}
+
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+
+ for_each_gt(gt, xe, id) {
+ drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tType: %s\n",
+ gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ drm_printf(p, "\tIP ver: %u.%u.%u\n",
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ }
+}
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return sign_extend64(address, xe->info.va_bits - 1);
+}
+
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
+}
+
+static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a mudule
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ if (xe->wedged.mode == 0) {
+ drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
+ return;
+ }
+
+ xe_pm_runtime_get_noresume(xe);
+
+ if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
+ drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
+ return;
+ }
+
+ if (!atomic_xchg(&xe->wedged.flag, 1)) {
+ xe->needs_flr_on_fini = true;
+ drm_err(&xe->drm,
+ "CRITICAL: Xe has declared device %s as wedged.\n"
+ "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+ "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+ dev_name(xe->drm.dev));
+ }
+
+ for_each_gt(gt, xe, id)
+ xe_gt_declare_wedged(gt);
+}
diff --git a/rr-cache/dc755e41b3920c4fd040291cf34f6f63263f016b/preimage b/rr-cache/dc755e41b3920c4fd040291cf34f6f63263f016b/preimage
new file mode 100644
index 000000000000..a71c059b6d65
--- /dev/null
+++ b/rr-cache/dc755e41b3920c4fd040291cf34f6f63263f016b/preimage
@@ -0,0 +1,531 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_display.h"
+#include "regs/xe_irq_regs.h"
+
+#include <linux/fb.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_probe_helper.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "soc/intel_dram.h"
+#include "intel_acpi.h"
+#include "intel_audio.h"
+#include "intel_bw.h"
+#include "intel_display.h"
+#include "intel_display_driver.h"
+#include "intel_display_irq.h"
+#include "intel_display_types.h"
+#include "intel_dmc.h"
+#include "intel_dp.h"
+#include "intel_encoder.h"
+#include "intel_fbdev.h"
+#include "intel_hdcp.h"
+#include "intel_hotplug.h"
+#include "intel_opregion.h"
+#include "xe_module.h"
+
+/* Xe device functions */
+
+static bool has_display(struct xe_device *xe)
+{
+ return HAS_DISPLAY(&xe->display);
+}
+
+/**
+ * xe_display_driver_probe_defer - Detect if we need to wait for other drivers
+ * early on
+ * @pdev: PCI device
+ *
+ * Returns: true if probe needs to be deferred, false otherwise
+ */
+bool xe_display_driver_probe_defer(struct pci_dev *pdev)
+{
+ if (!xe_modparam.probe_display)
+ return 0;
+
+ return intel_display_driver_probe_defer(pdev);
+}
+
+/**
+ * xe_display_driver_set_hooks - Add driver flags and hooks for display
+ * @driver: DRM device driver
+ *
+ * Set features and function hooks in @driver that are needed for driving the
+ * display IP. This sets the driver's capability of driving display, regardless
+ * if the device has it enabled
+ */
+void xe_display_driver_set_hooks(struct drm_driver *driver)
+{
+ if (!xe_modparam.probe_display)
+ return;
+
+ driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
+}
+
+static void unset_display_features(struct xe_device *xe)
+{
+ xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+}
+
+static void display_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ destroy_workqueue(xe->display.hotplug.dp_wq);
+}
+
+/**
+ * xe_display_create - create display struct
+ * @xe: XE device instance
+ *
+ * Initialize all fields used by the display part.
+ *
+ * TODO: once everything can be inside a single struct, make the struct opaque
+ * to the rest of xe and return it to be xe->display.
+ *
+ * Returns: 0 on success
+ */
+int xe_display_create(struct xe_device *xe)
+{
+ spin_lock_init(&xe->display.fb_tracking.lock);
+
+ xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
+
+ return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
+}
+
+static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_cleanup(xe);
+}
+
+int xe_display_init_nommio(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return 0;
+
+ /* Fake uncore lock */
+ spin_lock_init(&xe->uncore.lock);
+
+ /* This must be called before any calls to HAS_PCH_* */
+ intel_detect_pch(xe);
+
+ return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
+}
+
+static void xe_display_fini_noirq(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_remove_noirq(xe);
+ intel_opregion_cleanup(display);
+}
+
+int xe_display_init_noirq(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+ int err;
+
+ if (!xe->info.probe_display)
+ return 0;
+
+ intel_display_driver_early_probe(xe);
+
+ /* Early display init.. */
+ intel_opregion_setup(display);
+
+ /*
+ * Fill the dram structure to get the system dram info. This will be
+ * used for memory latency calculation.
+ */
+ intel_dram_detect(xe);
+
+ intel_bw_init_hw(xe);
+
+ intel_display_device_info_runtime_init(xe);
+
+ err = intel_display_driver_probe_noirq(xe);
+ if (err) {
+ intel_opregion_cleanup(display);
+ return err;
+ }
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe);
+}
+
+static void xe_display_fini_noaccel(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_remove_nogem(xe);
+}
+
+int xe_display_init_noaccel(struct xe_device *xe)
+{
+ int err;
+
+ if (!xe->info.probe_display)
+ return 0;
+
+ err = intel_display_driver_probe_nogem(xe);
+ if (err)
+ return err;
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noaccel, xe);
+}
+
+int xe_display_init(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return 0;
+
+ return intel_display_driver_probe(xe);
+}
+
+void xe_display_fini(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_hpd_poll_fini(xe);
+
+ intel_hdcp_component_fini(xe);
+ intel_audio_deinit(xe);
+}
+
+void xe_display_register(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_register(xe);
+ intel_register_dsm_handler();
+ intel_power_domains_enable(xe);
+}
+
+void xe_display_unregister(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_unregister_dsm_handler();
+ intel_power_domains_disable(xe);
+ intel_display_driver_unregister(xe);
+}
+
+void xe_display_driver_remove(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_driver_remove(xe);
+}
+
+/* IRQ-related functions */
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (master_ctl & DISPLAY_IRQ)
+ gen11_display_irq_handler(xe);
+}
+
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ if (gu_misc_iir & GU_MISC_GSE)
+ intel_opregion_asle_intr(display);
+}
+
+void xe_display_irq_reset(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ gen11_display_irq_reset(xe);
+}
+
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (gt->info.id == XE_GT0)
+ gen11_de_irq_postinstall(xe);
+}
+
+static bool suspend_to_idle(void)
+{
+#if IS_ENABLED(CONFIG_ACPI_SLEEP)
+ if (acpi_target_system_state() < ACPI_STATE_S3)
+ return true;
+#endif
+ return false;
+}
+
+static void xe_display_flush_cleanup_work(struct xe_device *xe)
+{
+ struct intel_crtc *crtc;
+
+ for_each_intel_crtc(&xe->drm, crtc) {
+ struct drm_crtc_commit *commit;
+
+ spin_lock(&crtc->base.commit_lock);
+ commit = list_first_entry_or_null(&crtc->base.commit_list,
+ struct drm_crtc_commit, commit_entry);
+ if (commit)
+ drm_crtc_commit_get(commit);
+ spin_unlock(&crtc->base.commit_lock);
+
+ if (commit) {
+ wait_for_completion(&commit->cleanup_done);
+ drm_crtc_commit_put(commit);
+ }
+ }
+}
+
+/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
+static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
+{
+ struct intel_display *display = &xe->display;
+ bool s2idle = suspend_to_idle();
+ if (!xe->info.probe_display)
+ return;
+
+ /*
+ * We do a lot of poking in a lot of registers, make sure they work
+ * properly.
+ */
+ intel_power_domains_disable(xe);
+ if (!runtime)
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+
+ if (!runtime && has_display(xe)) {
+ drm_kms_helper_poll_disable(&xe->drm);
+ intel_display_driver_disable_user_access(xe);
+ intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
+
+ if (!runtime)
+ intel_dp_mst_suspend(xe);
+
+ intel_hpd_cancel_work(xe);
+
+ if (!runtime && has_display(xe)) {
+ intel_display_driver_suspend_access(xe);
+ intel_encoder_suspend_all(&xe->display);
+ }
+
+ intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
+
+<<<<<<<
+ intel_dmc_suspend(display);
+=======
+ intel_dmc_suspend(xe);
+>>>>>>>
+
+ if (runtime && has_display(xe))
+ intel_hpd_poll_enable(xe);
+}
+
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+ __xe_display_pm_suspend(xe, false);
+}
+
+<<<<<<<
+=======
+void xe_display_pm_shutdown(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_disable(xe);
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (has_display(xe)) {
+ drm_kms_helper_poll_disable(&xe->drm);
+ intel_display_driver_disable_user_access(xe);
+ intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
+ intel_dp_mst_suspend(xe);
+ intel_hpd_cancel_work(xe);
+
+ if (has_display(xe))
+ intel_display_driver_suspend_access(xe);
+
+ intel_encoder_suspend_all(display);
+ intel_encoder_shutdown_all(display);
+
+ intel_opregion_suspend(display, PCI_D3cold);
+
+ intel_dmc_suspend(display);
+}
+
+>>>>>>>
+void xe_display_pm_runtime_suspend(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_suspend(xe, true);
+ return;
+ }
+
+ intel_hpd_poll_enable(xe);
+}
+
+void xe_display_pm_suspend_late(struct xe_device *xe)
+{
+ bool s2idle = suspend_to_idle();
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_suspend(xe, s2idle);
+
+ intel_display_power_suspend_late(xe);
+}
+
+<<<<<<<
+=======
+void xe_display_pm_shutdown_late(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ /*
+ * The only requirement is to reboot with display DC states disabled,
+ * for now leaving all display power wells in the INIT power domain
+ * enabled.
+ */
+ intel_power_domains_driver_remove(xe);
+}
+
+>>>>>>>
+void xe_display_pm_resume_early(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_power_resume_early(xe);
+
+ intel_power_domains_resume(xe);
+}
+
+static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_dmc_resume(display);
+
+ if (has_display(xe))
+ drm_mode_config_reset(&xe->drm);
+
+ intel_display_driver_init_hw(xe);
+ intel_hpd_init(xe);
+
+ if (!runtime && has_display(xe))
+ intel_display_driver_resume_access(xe);
+
+ /* MST sideband requires HPD interrupts enabled */
+ if (!runtime)
+ intel_dp_mst_resume(xe);
+
+ if (!runtime && has_display(xe)) {
+ intel_display_driver_resume(xe);
+ drm_kms_helper_poll_enable(&xe->drm);
+ intel_display_driver_enable_user_access(xe);
+ }
+
+ if (has_display(xe))
+ intel_hpd_poll_disable(xe);
+
+ intel_opregion_resume(display);
+
+ if (!runtime)
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
+
+ intel_power_domains_enable(xe);
+}
+
+void xe_display_pm_resume(struct xe_device *xe)
+{
+ __xe_display_pm_resume(xe, false);
+}
+
+void xe_display_pm_runtime_resume(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_resume(xe, true);
+ return;
+ }
+
+ intel_hpd_init(xe);
+ intel_hpd_poll_disable(xe);
+}
+
+
+static void display_device_remove(struct drm_device *dev, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ intel_display_device_remove(xe);
+}
+
+int xe_display_probe(struct xe_device *xe)
+{
+ int err;
+
+ if (!xe->info.probe_display)
+ goto no_display;
+
+ intel_display_device_probe(xe);
+
+ err = drmm_add_action_or_reset(&xe->drm, display_device_remove, xe);
+ if (err)
+ return err;
+
+ if (has_display(xe))
+ return 0;
+
+no_display:
+ xe->info.probe_display = false;
+ unset_display_features(xe);
+ return 0;
+}